[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CALx6S346BsdtDHStY=vxa22oSRPWpBk4ZCoGrL=XQUHno0GZgw@mail.gmail.com>
Date: Fri, 1 Dec 2017 17:14:57 -0800
From: Tom Herbert <tom@...bertland.com>
To: Shaohua Li <shli@...nel.org>
Cc: Linux Kernel Network Developers <netdev@...r.kernel.org>,
"David S. Miller" <davem@...emloft.net>, Martin Lau <kafai@...com>,
Eric Dumazet <eric.dumazet@...il.com>, flo@...rcot.fr,
Cong Wang <xiyou.wangcong@...il.com>, Shaohua Li <shli@...com>
Subject: Re: [PATCH net-next V3 3/3] net: add a sysctl to make auto flowlabel consistent
On Fri, Dec 1, 2017 at 3:31 PM, Shaohua Li <shli@...nel.org> wrote:
> From: Shaohua Li <shli@...com>
>
> Currently if there is negative routing, we change sock's txhash, so the
> sock will have a different flowlabel and route to different path.
> According to Tom, we'd better to have option to enable this, because some
> routers require flowlabel consistent. By default, we maintain consistent
> flowlabel, eg, negative routing doesn't change flowlabel.
>
> Suggested-by: Tom Herbert <tom@...bertland.com>
> Signed-off-by: Shaohua Li <shli@...com>
> ---
> Documentation/networking/ip-sysctl.txt | 7 +++++++
> include/net/netns/ipv6.h | 1 +
> include/net/sock.h | 28 +++++++++++++++-------------
> net/ipv6/af_inet6.c | 1 +
> net/ipv6/sysctl_net_ipv6.c | 8 ++++++++
> 5 files changed, 32 insertions(+), 13 deletions(-)
>
> diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
> index 46c7e10..14132a0 100644
> --- a/Documentation/networking/ip-sysctl.txt
> +++ b/Documentation/networking/ip-sysctl.txt
> @@ -1345,6 +1345,13 @@ auto_flowlabels - INTEGER
> be disabled by the socket option
> Default: 1
>
> +consistent_auto_flowlabel - BOOLEAN
I think we should call it consistent_txhash since this isn't just
about the flow label.
> + When auto_flowlabels is enabled, this option makes socket flowlabel
> + consistent in the lifetime.
> + TRUE: enabled
> + FALSE: disabled
> + Default: TRUE
> +
> flowlabel_state_ranges - BOOLEAN
> Split the flow label number space into two ranges. 0-0x7FFFF is
> reserved for the IPv6 flow manager facility, 0x80000-0xFFFFF
> diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
> index 987cc45..e55f851 100644
> --- a/include/net/netns/ipv6.h
> +++ b/include/net/netns/ipv6.h
> @@ -30,6 +30,7 @@ struct netns_sysctl_ipv6 {
> int ip6_rt_min_advmss;
> int flowlabel_consistency;
> int auto_flowlabels;
> + int consistent_auto_flowlabel;
> int icmpv6_time;
> int anycast_src_echo_reply;
> int ip_nonlocal_bind;
> diff --git a/include/net/sock.h b/include/net/sock.h
> index b9cb9d2..45e868f 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -1729,6 +1729,18 @@ static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
> return sk ? sk->sk_uid : make_kuid(net->user_ns, 0);
> }
>
> +static inline
> +struct net *sock_net(const struct sock *sk)
> +{
> + return read_pnet(&sk->sk_net);
> +}
> +
> +static inline
> +void sock_net_set(struct sock *sk, struct net *net)
> +{
> + write_pnet(&sk->sk_net, net);
> +}
> +
> static inline void sk_set_txhash(struct sock *sk, u32 hash)
> {
> sk->sk_txhash = hash;
> @@ -1736,7 +1748,9 @@ static inline void sk_set_txhash(struct sock *sk, u32 hash)
>
> static inline void sk_rethink_txhash(struct sock *sk)
> {
> - if (sk->sk_txhash) {
> + struct net *net = sock_net(sk);
> +
> + if (sk->sk_txhash && !net->ipv6.sysctl.consistent_auto_flowlabel) {
> u32 v = prandom_u32();
> sk->sk_txhash = v ?: 1;
> }
> @@ -2291,18 +2305,6 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
> __kfree_skb(skb);
> }
>
> -static inline
> -struct net *sock_net(const struct sock *sk)
> -{
> - return read_pnet(&sk->sk_net);
> -}
> -
> -static inline
> -void sock_net_set(struct sock *sk, struct net *net)
> -{
> - write_pnet(&sk->sk_net, net);
> -}
> -
> static inline struct sock *skb_steal_sock(struct sk_buff *skb)
> {
> if (skb->sk) {
> diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
> index c26f712..fe9b312 100644
> --- a/net/ipv6/af_inet6.c
> +++ b/net/ipv6/af_inet6.c
> @@ -807,6 +807,7 @@ static int __net_init inet6_net_init(struct net *net)
> net->ipv6.sysctl.icmpv6_time = 1*HZ;
> net->ipv6.sysctl.flowlabel_consistency = 1;
> net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS;
> + net->ipv6.sysctl.consistent_auto_flowlabel = 1;
> net->ipv6.sysctl.idgen_retries = 3;
> net->ipv6.sysctl.idgen_delay = 1 * HZ;
> net->ipv6.sysctl.flowlabel_state_ranges = 0;
> diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
> index a789a8a..8908092 100644
> --- a/net/ipv6/sysctl_net_ipv6.c
> +++ b/net/ipv6/sysctl_net_ipv6.c
> @@ -126,6 +126,13 @@ static struct ctl_table ipv6_table_template[] = {
> .mode = 0644,
> .proc_handler = proc_dointvec
> },
> + {
> + .procname = "consistent_auto_flowlabel",
> + .data = &init_net.ipv6.sysctl.consistent_auto_flowlabel,
> + .maxlen = sizeof(int),
> + .mode = 0644,
> + .proc_handler = proc_dointvec
> + },
> { }
> };
>
> @@ -190,6 +197,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
> ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
> ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
> ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
> + ipv6_table[14].data = &net->ipv6.sysctl.consistent_auto_flowlabel;
>
> ipv6_route_table = ipv6_route_sysctl_init(net);
> if (!ipv6_route_table)
> --
> 2.9.5
>
Powered by blists - more mailing lists