[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <19a7f45a619e55708414476164920ecf90591b04.1512170825.git.shli@fb.com>
Date: Fri, 1 Dec 2017 15:31:50 -0800
From: Shaohua Li <shli@...nel.org>
To: netdev@...r.kernel.org, davem@...emloft.net
Cc: kafai@...com, eric.dumazet@...il.com, flo@...rcot.fr,
xiyou.wangcong@...il.com, tom@...bertland.com,
Shaohua Li <shli@...com>
Subject: [PATCH net-next V3 3/3] net: add a sysctl to make auto flowlabel consistent
From: Shaohua Li <shli@...com>
Currently if there is negative routing, we change sock's txhash, so the
sock will have a different flowlabel and route to different path.
According to Tom, we'd better to have option to enable this, because some
routers require flowlabel consistent. By default, we maintain consistent
flowlabel, eg, negative routing doesn't change flowlabel.
Suggested-by: Tom Herbert <tom@...bertland.com>
Signed-off-by: Shaohua Li <shli@...com>
---
Documentation/networking/ip-sysctl.txt | 7 +++++++
include/net/netns/ipv6.h | 1 +
include/net/sock.h | 28 +++++++++++++++-------------
net/ipv6/af_inet6.c | 1 +
net/ipv6/sysctl_net_ipv6.c | 8 ++++++++
5 files changed, 32 insertions(+), 13 deletions(-)
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 46c7e10..14132a0 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1345,6 +1345,13 @@ auto_flowlabels - INTEGER
be disabled by the socket option
Default: 1
+consistent_auto_flowlabel - BOOLEAN
+ When auto_flowlabels is enabled, this option makes socket flowlabel
+ consistent in the lifetime.
+ TRUE: enabled
+ FALSE: disabled
+ Default: TRUE
+
flowlabel_state_ranges - BOOLEAN
Split the flow label number space into two ranges. 0-0x7FFFF is
reserved for the IPv6 flow manager facility, 0x80000-0xFFFFF
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 987cc45..e55f851 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -30,6 +30,7 @@ struct netns_sysctl_ipv6 {
int ip6_rt_min_advmss;
int flowlabel_consistency;
int auto_flowlabels;
+ int consistent_auto_flowlabel;
int icmpv6_time;
int anycast_src_echo_reply;
int ip_nonlocal_bind;
diff --git a/include/net/sock.h b/include/net/sock.h
index b9cb9d2..45e868f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1729,6 +1729,18 @@ static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
return sk ? sk->sk_uid : make_kuid(net->user_ns, 0);
}
+static inline
+struct net *sock_net(const struct sock *sk)
+{
+ return read_pnet(&sk->sk_net);
+}
+
+static inline
+void sock_net_set(struct sock *sk, struct net *net)
+{
+ write_pnet(&sk->sk_net, net);
+}
+
static inline void sk_set_txhash(struct sock *sk, u32 hash)
{
sk->sk_txhash = hash;
@@ -1736,7 +1748,9 @@ static inline void sk_set_txhash(struct sock *sk, u32 hash)
static inline void sk_rethink_txhash(struct sock *sk)
{
- if (sk->sk_txhash) {
+ struct net *net = sock_net(sk);
+
+ if (sk->sk_txhash && !net->ipv6.sysctl.consistent_auto_flowlabel) {
u32 v = prandom_u32();
sk->sk_txhash = v ?: 1;
}
@@ -2291,18 +2305,6 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb);
}
-static inline
-struct net *sock_net(const struct sock *sk)
-{
- return read_pnet(&sk->sk_net);
-}
-
-static inline
-void sock_net_set(struct sock *sk, struct net *net)
-{
- write_pnet(&sk->sk_net, net);
-}
-
static inline struct sock *skb_steal_sock(struct sk_buff *skb)
{
if (skb->sk) {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c26f712..fe9b312 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -807,6 +807,7 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.icmpv6_time = 1*HZ;
net->ipv6.sysctl.flowlabel_consistency = 1;
net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS;
+ net->ipv6.sysctl.consistent_auto_flowlabel = 1;
net->ipv6.sysctl.idgen_retries = 3;
net->ipv6.sysctl.idgen_delay = 1 * HZ;
net->ipv6.sysctl.flowlabel_state_ranges = 0;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index a789a8a..8908092 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -126,6 +126,13 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "consistent_auto_flowlabel",
+ .data = &init_net.ipv6.sysctl.consistent_auto_flowlabel,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{ }
};
@@ -190,6 +197,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
+ ipv6_table[14].data = &net->ipv6.sysctl.consistent_auto_flowlabel;
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
--
2.9.5
Powered by blists - more mailing lists