lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250227123137.138778-1-dongml2@chinatelecom.cn>
Date: Thu, 27 Feb 2025 20:31:37 +0800
From: Menglong Dong <menglong8.dong@...il.com>
To: edumazet@...gle.com
Cc: davem@...emloft.net,
	kuba@...nel.org,
	pabeni@...hat.com,
	horms@...nel.org,
	ncardwell@...gle.com,
	kuniyu@...zon.com,
	dsahern@...nel.org,
	kerneljasonxing@...il.com,
	yyd@...gle.com,
	dongml2@...natelecom.cn,
	petrm@...dia.com,
	netdev@...r.kernel.org,
	linux-kernel@...r.kernel.org
Subject: [RFC PATCH net-next] net: ip: add sysctl_ip_reuse_exact_match

For now, the socket lookup will terminate if the socket is reuse port in
inet_lhash2_lookup(), which makes the socket is not the best match.

For example, we have socket1 listening on "0.0.0.0:1234" and socket2
listening on "192.168.1.1:1234", and both of them enabled reuse port. The
socket1 will always be matched when a connection with the peer ip
"192.168.1.xx" comes if the socket1 is created later than socket2. This
is not expected, as socket2 has higher priority.

This can cause unexpected behavior if TCP MD5 keys is used, as described
in Documentation/networking/vrf.rst -> Applications.

Introduce the sysctl_ip_reuse_exact_match to make it find a best matched
socket when reuse port is used.

Signed-off-by: Menglong Dong <dongml2@...natelecom.cn>
---
 include/net/netns/ipv4.h    |  1 +
 net/ipv4/inet_hashtables.c  | 22 ++++++++++++++++++----
 net/ipv4/sysctl_net_ipv4.c  |  9 +++++++++
 net/ipv6/inet6_hashtables.c | 22 ++++++++++++++++++----
 4 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 45ac125e8aeb..5e4b63c40e1c 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -142,6 +142,7 @@ struct netns_ipv4 {
 	u8 sysctl_ip_fwd_update_priority;
 	u8 sysctl_ip_nonlocal_bind;
 	u8 sysctl_ip_autobind_reuse;
+	u8 sysctl_ip_reuse_exact_match;
 	/* Shall we try to damage output packets if routing dev changes? */
 	u8 sysctl_ip_dynaddr;
 #ifdef CONFIG_NET_L3_MASTER_DEV
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 9bfcfd016e18..5ca495361484 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -384,20 +384,34 @@ static struct sock *inet_lhash2_lookup(const struct net *net,
 	struct sock *sk, *result = NULL;
 	struct hlist_nulls_node *node;
 	int score, hiscore = 0;
+	bool reuse_exact_match;
 
+	reuse_exact_match = READ_ONCE(net->ipv4.sysctl_ip_reuse_exact_match);
 	sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) {
 		score = compute_score(sk, net, hnum, daddr, dif, sdif);
 		if (score > hiscore) {
-			result = inet_lookup_reuseport(net, sk, skb, doff,
-						       saddr, sport, daddr, hnum, inet_ehashfn);
-			if (result)
-				return result;
+			if (!reuse_exact_match) {
+				result = inet_lookup_reuseport(net, sk, skb,
+							       doff, saddr,
+							       sport, daddr,
+							       hnum, inet_ehashfn);
+				if (result)
+					return result;
+			}
 
 			result = sk;
 			hiscore = score;
 		}
 	}
 
+	if (reuse_exact_match) {
+		sk = inet_lookup_reuseport(net, result, skb, doff, saddr,
+					   sport, daddr, hnum,
+					   inet_ehashfn);
+		if (sk)
+			return sk;
+	}
+
 	return result;
 }
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 3a43010d726f..be93b2c22d91 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -838,6 +838,15 @@ static struct ctl_table ipv4_net_table[] = {
 		.extra1         = SYSCTL_ZERO,
 		.extra2         = SYSCTL_ONE,
 	},
+	{
+		.procname	= "ip_reuse_exact_match",
+		.data		= &init_net.ipv4.sysctl_ip_reuse_exact_match,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler	= proc_dou8vec_minmax,
+		.extra1         = SYSCTL_ZERO,
+		.extra2         = SYSCTL_ONE,
+	},
 	{
 		.procname	= "fwmark_reflect",
 		.data		= &init_net.ipv4.sysctl_fwmark_reflect,
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 9ec05e354baa..b8f130a2a135 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -157,20 +157,34 @@ static struct sock *inet6_lhash2_lookup(const struct net *net,
 	struct sock *sk, *result = NULL;
 	struct hlist_nulls_node *node;
 	int score, hiscore = 0;
+	bool reuse_exact_match;
 
+	reuse_exact_match = READ_ONCE(net->ipv4.sysctl_ip_reuse_exact_match);
 	sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) {
 		score = compute_score(sk, net, hnum, daddr, dif, sdif);
 		if (score > hiscore) {
-			result = inet6_lookup_reuseport(net, sk, skb, doff,
-							saddr, sport, daddr, hnum, inet6_ehashfn);
-			if (result)
-				return result;
+			if (!reuse_exact_match) {
+				result = inet6_lookup_reuseport(net, sk, skb,
+								doff, saddr,
+								sport, daddr,
+								hnum, inet6_ehashfn);
+				if (result)
+					return result;
+			}
 
 			result = sk;
 			hiscore = score;
 		}
 	}
 
+	if (reuse_exact_match) {
+		sk = inet6_lookup_reuseport(net, result, skb, doff, saddr,
+					    sport, daddr, hnum,
+					    inet6_ehashfn);
+		if (sk)
+			return sk;
+	}
+
 	return result;
 }
 
-- 
2.39.5


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ