lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-Id: <20250618030537.28394-1-yangfeng59949@163.com>
Date: Wed, 18 Jun 2025 11:05:37 +0800
From: Feng Yang <yangfeng59949@....com>
To: davem@...emloft.net,
	edumazet@...gle.com,
	kuba@...nel.org,
	pabeni@...hat.com,
	horms@...nel.org,
	willemb@...gle.com,
	almasrymina@...gle.com,
	kerneljasonxing@...il.com,
	ebiggers@...gle.com,
	asml.silence@...il.com,
	aleksander.lobakin@...el.com
Cc: yangfeng@...inos.cn,
	netdev@...r.kernel.org,
	linux-kernel@...r.kernel.org
Subject: [RFC PATCH net-next] skbuff: Improve the sending efficiency of __skb_send_sock

From: Feng Yang <yangfeng@...inos.cn>

By aggregating skb data into a bvec array for transmission, when using sockmap to forward large packets,
what previously required multiple transmissions now only needs a single transmission, which significantly enhances performance.
For small packets, the performance remains comparable to the original level.

Signed-off-by: Feng Yang <yangfeng@...inos.cn>
---
 net/core/skbuff.c | 110 ++++++++++++++++++++++------------------------
 1 file changed, 52 insertions(+), 58 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 85fc82f72d26..19d78285a1c9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3235,82 +3235,75 @@ typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg);
 static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset,
 			   int len, sendmsg_func sendmsg, int flags)
 {
-	unsigned int orig_len = len;
 	struct sk_buff *head = skb;
 	unsigned short fragidx;
-	int slen, ret;
+	struct msghdr msg;
+	struct bio_vec *bvec;
+	int max_vecs, ret;
+	int bvec_count = 0;
+	unsigned int copied = 0;
+
+	max_vecs = skb_shinfo(skb)->nr_frags + 1; // +1 for linear data
+	if (skb_has_frag_list(skb)) {
+		struct sk_buff *frag_skb = skb_shinfo(skb)->frag_list;
+
+		while (frag_skb) {
+			max_vecs += skb_shinfo(frag_skb)->nr_frags + 1; // +1 for linear data
+			frag_skb = frag_skb->next;
+		}
+	}
+
+	bvec = kcalloc(max_vecs, sizeof(struct bio_vec), GFP_KERNEL);
+	if (!bvec)
+		return -ENOMEM;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT | flags;
 
 do_frag_list:
 
 	/* Deal with head data */
-	while (offset < skb_headlen(skb) && len) {
-		struct kvec kv;
-		struct msghdr msg;
-
-		slen = min_t(int, len, skb_headlen(skb) - offset);
-		kv.iov_base = skb->data + offset;
-		kv.iov_len = slen;
-		memset(&msg, 0, sizeof(msg));
-		msg.msg_flags = MSG_DONTWAIT | flags;
+	if (offset < skb_headlen(skb)) {
+		unsigned int copy_len = min(skb_headlen(skb) - offset, len - copied);
+		struct page *page = virt_to_page(skb->data + offset);
+		unsigned int page_offset = offset_in_page(skb->data + offset);
 
-		iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &kv, 1, slen);
-		ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
-				      sendmsg_unlocked, sk, &msg);
-		if (ret <= 0)
-			goto error;
+		if (!sendpage_ok(page))
+			msg.msg_flags &= ~MSG_SPLICE_PAGES;
 
-		offset += ret;
-		len -= ret;
+		bvec_set_page(&bvec[bvec_count++], page, copy_len, page_offset);
+		copied += copy_len;
+		offset += copy_len;
 	}
 
-	/* All the data was skb head? */
-	if (!len)
-		goto out;
-
 	/* Make offset relative to start of frags */
 	offset -= skb_headlen(skb);
 
-	/* Find where we are in frag list */
-	for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
-		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
+	if (copied < len) {
+		for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
+			skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
+			unsigned int frag_size = skb_frag_size(frag);
 
-		if (offset < skb_frag_size(frag))
-			break;
-
-		offset -= skb_frag_size(frag);
-	}
-
-	for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
-		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
-
-		slen = min_t(size_t, len, skb_frag_size(frag) - offset);
+			/* Find where we are in frag list */
+			if (offset >= frag_size) {
+				offset -= frag_size;
+				continue;
+			}
 
-		while (slen) {
-			struct bio_vec bvec;
-			struct msghdr msg = {
-				.msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT |
-					     flags,
-			};
+			unsigned int copy_len = min(frag_size - offset, len - copied);
 
-			bvec_set_page(&bvec, skb_frag_page(frag), slen,
+			bvec_set_page(&bvec[bvec_count++], skb_frag_page(frag), copy_len,
 				      skb_frag_off(frag) + offset);
-			iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1,
-				      slen);
 
-			ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
-					      sendmsg_unlocked, sk, &msg);
-			if (ret <= 0)
-				goto error;
+			copied += copy_len;
+			offset = 0;
 
-			len -= ret;
-			offset += ret;
-			slen -= ret;
+			if (copied >= len)
+				break;
 		}
-
-		offset = 0;
 	}
 
-	if (len) {
+	if (copied < len) {
 		/* Process any frag lists */
 
 		if (skb == head) {
@@ -3324,11 +3317,12 @@ static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset,
 		}
 	}
 
-out:
-	return orig_len - len;
+	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, bvec, bvec_count, len);
+	ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked, sendmsg_unlocked, sk, &msg);
+
+	kfree(bvec);
 
-error:
-	return orig_len == len ? ret : orig_len - len;
+	return ret;
 }
 
 /* Send skb data on a socket. Socket must be locked. */
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ