lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <19a7f45a619e55708414476164920ecf90591b04.1512170825.git.shli@fb.com>
Date:   Fri,  1 Dec 2017 15:31:50 -0800
From:   Shaohua Li <shli@...nel.org>
To:     netdev@...r.kernel.org, davem@...emloft.net
Cc:     kafai@...com, eric.dumazet@...il.com, flo@...rcot.fr,
        xiyou.wangcong@...il.com, tom@...bertland.com,
        Shaohua Li <shli@...com>
Subject: [PATCH net-next V3 3/3] net: add a sysctl to make auto flowlabel consistent

From: Shaohua Li <shli@...com>

Currently if there is negative routing, we change sock's txhash, so the
sock will have a different flowlabel and route to different path.
According to Tom, we'd better to have option to enable this, because some
routers require flowlabel consistent. By default, we maintain consistent
flowlabel, eg, negative routing doesn't change flowlabel.

Suggested-by: Tom Herbert <tom@...bertland.com>
Signed-off-by: Shaohua Li <shli@...com>
---
 Documentation/networking/ip-sysctl.txt |  7 +++++++
 include/net/netns/ipv6.h               |  1 +
 include/net/sock.h                     | 28 +++++++++++++++-------------
 net/ipv6/af_inet6.c                    |  1 +
 net/ipv6/sysctl_net_ipv6.c             |  8 ++++++++
 5 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 46c7e10..14132a0 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1345,6 +1345,13 @@ auto_flowlabels - INTEGER
 	   be disabled by the socket option
 	Default: 1
 
+consistent_auto_flowlabel - BOOLEAN
+	When auto_flowlabels is enabled, this option makes socket flowlabel
+	consistent in the lifetime.
+	TRUE: enabled
+	FALSE: disabled
+	Default: TRUE
+
 flowlabel_state_ranges - BOOLEAN
 	Split the flow label number space into two ranges. 0-0x7FFFF is
 	reserved for the IPv6 flow manager facility, 0x80000-0xFFFFF
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 987cc45..e55f851 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -30,6 +30,7 @@ struct netns_sysctl_ipv6 {
 	int ip6_rt_min_advmss;
 	int flowlabel_consistency;
 	int auto_flowlabels;
+	int consistent_auto_flowlabel;
 	int icmpv6_time;
 	int anycast_src_echo_reply;
 	int ip_nonlocal_bind;
diff --git a/include/net/sock.h b/include/net/sock.h
index b9cb9d2..45e868f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1729,6 +1729,18 @@ static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
 	return sk ? sk->sk_uid : make_kuid(net->user_ns, 0);
 }
 
+static inline
+struct net *sock_net(const struct sock *sk)
+{
+	return read_pnet(&sk->sk_net);
+}
+
+static inline
+void sock_net_set(struct sock *sk, struct net *net)
+{
+	write_pnet(&sk->sk_net, net);
+}
+
 static inline void sk_set_txhash(struct sock *sk, u32 hash)
 {
 	sk->sk_txhash = hash;
@@ -1736,7 +1748,9 @@ static inline void sk_set_txhash(struct sock *sk, u32 hash)
 
 static inline void sk_rethink_txhash(struct sock *sk)
 {
-	if (sk->sk_txhash) {
+	struct net *net = sock_net(sk);
+
+	if (sk->sk_txhash && !net->ipv6.sysctl.consistent_auto_flowlabel) {
 		u32 v = prandom_u32();
 		sk->sk_txhash = v ?: 1;
 	}
@@ -2291,18 +2305,6 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
 	__kfree_skb(skb);
 }
 
-static inline
-struct net *sock_net(const struct sock *sk)
-{
-	return read_pnet(&sk->sk_net);
-}
-
-static inline
-void sock_net_set(struct sock *sk, struct net *net)
-{
-	write_pnet(&sk->sk_net, net);
-}
-
 static inline struct sock *skb_steal_sock(struct sk_buff *skb)
 {
 	if (skb->sk) {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c26f712..fe9b312 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -807,6 +807,7 @@ static int __net_init inet6_net_init(struct net *net)
 	net->ipv6.sysctl.icmpv6_time = 1*HZ;
 	net->ipv6.sysctl.flowlabel_consistency = 1;
 	net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS;
+	net->ipv6.sysctl.consistent_auto_flowlabel = 1;
 	net->ipv6.sysctl.idgen_retries = 3;
 	net->ipv6.sysctl.idgen_delay = 1 * HZ;
 	net->ipv6.sysctl.flowlabel_state_ranges = 0;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index a789a8a..8908092 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -126,6 +126,13 @@ static struct ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "consistent_auto_flowlabel",
+		.data		= &init_net.ipv6.sysctl.consistent_auto_flowlabel,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{ }
 };
 
@@ -190,6 +197,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
 	ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
 	ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
 	ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
+	ipv6_table[14].data = &net->ipv6.sysctl.consistent_auto_flowlabel;
 
 	ipv6_route_table = ipv6_route_sysctl_init(net);
 	if (!ipv6_route_table)
-- 
2.9.5

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ