lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 1 Dec 2017 13:15:48 -0800
From:   Tom Herbert <tom@...bertland.com>
To:     Shaohua Li <shli@...nel.org>
Cc:     Linux Kernel Network Developers <netdev@...r.kernel.org>,
        "David S. Miller" <davem@...emloft.net>, Martin Lau <kafai@...com>,
        Eric Dumazet <eric.dumazet@...il.com>, flo@...rcot.fr,
        Cong Wang <xiyou.wangcong@...il.com>, Shaohua Li <shli@...com>
Subject: Re: [PATCH net-next V2 1/2] net-next: use five-tuple hash for sk_txhash

On Fri, Dec 1, 2017 at 1:00 PM, Shaohua Li <shli@...nel.org> wrote:
> From: Shaohua Li <shli@...com>
>
> We are using sk_txhash to calculate flowlabel, but sk_txhash isn't
> always available, for example, in inet_timewait_sock. This causes
> problem for reset packet, which will have a different flowlabel. This
> causes our router doesn't correctly close tcp connection. We are using
> flowlabel to do load balance. Routers in the path maintain connection
> state. So if flow label changes, the packet is routed through a
> different router. In this case, the old router doesn't get the reset
> packet to close the tcp connection.
>
> Per Tom's suggestion, we switch back to five-tuple hash, so we can
> reconstruct correct flowlabel for reset packet.
>
Thanks for doing this!

> At most places, we already have the flowi info, so we directly use it
> build sk_txhash. For synack, we do this after route search. At that
> time, we have the flowi info ready, so don't need to create the flowi
> info again.
>
> I don't change sk_rethink_txhash() though, it still uses random hash,
> which is the whole point to select a different path after a negative
> routing advise.
>
> Cc: Martin KaFai Lau <kafai@...com>
> Cc: Eric Dumazet <eric.dumazet@...il.com>
> Cc: Florent Fourcot <flo@...rcot.fr>
> Cc: Cong Wang <xiyou.wangcong@...il.com>
> Cc: Tom Herbert <tom@...bertland.com>
> Signed-off-by: Shaohua Li <shli@...com>
> ---
>  include/net/sock.h    | 18 ++++--------------
>  include/net/tcp.h     |  2 +-
>  net/ipv4/datagram.c   |  2 +-
>  net/ipv4/syncookies.c |  4 +++-
>  net/ipv4/tcp_input.c  |  1 -
>  net/ipv4/tcp_ipv4.c   | 17 ++++++++++++-----
>  net/ipv4/tcp_output.c |  1 -
>  net/ipv6/datagram.c   |  4 +++-
>  net/ipv6/syncookies.c |  3 ++-
>  net/ipv6/tcp_ipv6.c   | 18 +++++++++++++-----
>  10 files changed, 39 insertions(+), 31 deletions(-)
>
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 79e1a2c..640db0f 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -1729,22 +1729,12 @@ static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
>         return sk ? sk->sk_uid : make_kuid(net->user_ns, 0);
>  }
>
> -static inline u32 net_tx_rndhash(void)
> -{
> -       u32 v = prandom_u32();
> -
> -       return v ?: 1;
> -}
> -
> -static inline void sk_set_txhash(struct sock *sk)
> -{
> -       sk->sk_txhash = net_tx_rndhash();
> -}
> -
>  static inline void sk_rethink_txhash(struct sock *sk)
>  {
> -       if (sk->sk_txhash)
> -               sk_set_txhash(sk);
> +       if (sk->sk_txhash) {
> +               u32 v = prandom_u32();
> +               sk->sk_txhash = v ?: 1;
> +       }

We'll need to add configuration about whether rethink is done at all.
Conservative approach is probably to disable it by default. That is
the default behavior of the stack is that flow label is consistent for
lifetime of a flow.

>  }
>
>  static inline struct dst_entry *
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 4e09398..a5c28be 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -1840,7 +1840,7 @@ struct tcp_request_sock_ops {
>                                  __u16 *mss);
>  #endif
>         struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl,
> -                                      const struct request_sock *req);
> +                                      struct request_sock *req);
>         u32 (*init_seq)(const struct sk_buff *skb);
>         u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb);
>         int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
> diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
> index f915abf..ed9ccb7 100644
> --- a/net/ipv4/datagram.c
> +++ b/net/ipv4/datagram.c
> @@ -74,7 +74,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
>         inet->inet_daddr = fl4->daddr;
>         inet->inet_dport = usin->sin_port;
>         sk->sk_state = TCP_ESTABLISHED;
> -       sk_set_txhash(sk);
> +       sk->sk_txhash = get_hash_from_flowi4(fl4);

Maybe keep sk_set_txhash but add an argument that gives the hash.
Hiding behind a function gives us the place to add/change logic in the
future.

>         inet->inet_id = jiffies;
>
>         sk_dst_set(sk, &rt->dst);
> diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
> index fda37f2..76f1cf6 100644
> --- a/net/ipv4/syncookies.c
> +++ b/net/ipv4/syncookies.c
> @@ -335,7 +335,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
>         treq->rcv_isn           = ntohl(th->seq) - 1;
>         treq->snt_isn           = cookie;
>         treq->ts_off            = 0;
> -       treq->txhash            = net_tx_rndhash();
>         req->mss                = mss;
>         ireq->ir_num            = ntohs(th->dest);
>         ireq->ir_rmt_port       = th->source;
> @@ -376,6 +375,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
>                            opt->srr ? opt->faddr : ireq->ir_rmt_addr,
>                            ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid);
>         security_req_classify_flow(req, flowi4_to_flowi(&fl4));
> +
> +       treq->txhash = get_hash_from_flowi4(&fl4);
> +
>         rt = ip_route_output_key(sock_net(sk), &fl4);
>         if (IS_ERR(rt)) {
>                 reqsk_free(req);
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 734cfc8..e886c28 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -6288,7 +6288,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
>         }
>
>         tcp_rsk(req)->snt_isn = isn;
> -       tcp_rsk(req)->txhash = net_tx_rndhash();
>         tcp_openreq_init_rwin(req, sk, dst);
>         if (!want_cookie) {
>                 tcp_reqsk_record_syn(sk, req, skb);
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index c6bc0c4..0e71b1b 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -222,7 +222,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
>         if (err)
>                 goto failure;
>
> -       sk_set_txhash(sk);
> +       sk->sk_txhash = get_hash_from_flowi4(fl4);
>
>         rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
>                                inet->inet_sport, inet->inet_dport, sk);
> @@ -871,8 +871,12 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
>         struct sk_buff *skb;
>
>         /* First, grab a route. */
> -       if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
> -               return -1;
> +       if (!dst) {
> +               if ((dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
> +                       return -1;
> +
> +               tcp_rsk(req)->txhash = get_hash_from_flowi4(&fl4);
> +       }
>
>         skb = tcp_make_synack(sk, dst, req, foc, synack_type);
>
> @@ -1278,9 +1282,12 @@ static void tcp_v4_init_req(struct request_sock *req,
>
>  static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
>                                           struct flowi *fl,
> -                                         const struct request_sock *req)
> +                                         struct request_sock *req)
>  {
> -       return inet_csk_route_req(sk, &fl->u.ip4, req);
> +       struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
> +       if (dst)
> +               tcp_rsk(req)->txhash = get_hash_from_flowi4(&fl->u.ip4);

This is also apprearing enough to maybe warrant a function to set hash
in a request_sock.

> +       return dst;
>  }
>
>  struct request_sock_ops tcp_request_sock_ops __read_mostly = {
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index a4d214c..94e56cb 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -3752,7 +3752,6 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
>         struct flowi fl;
>         int res;
>
> -       tcp_rsk(req)->txhash = net_tx_rndhash();
>         res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL);
>         if (!res) {
>                 __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
> diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
> index a1f9187..c13286c 100644
> --- a/net/ipv6/datagram.c
> +++ b/net/ipv6/datagram.c
> @@ -150,6 +150,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
>         int                     addr_type;
>         int                     err;
>         __be32                  fl6_flowlabel = 0;
> +       struct flowi6           fl6;
>
>         if (usin->sin6_family == AF_INET) {
>                 if (__ipv6_only_sock(sk))
> @@ -260,7 +261,8 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
>         }
>
>         sk->sk_state = TCP_ESTABLISHED;
> -       sk_set_txhash(sk);
> +       ip6_datagram_flow_key_init(&fl6, sk);
> +       sk->sk_txhash = get_hash_from_flowi6(&fl6);
>  out:
>         return err;
>  }
> diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
> index e7a3a6b..3ba9401 100644
> --- a/net/ipv6/syncookies.c
> +++ b/net/ipv6/syncookies.c
> @@ -216,7 +216,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
>         treq->rcv_isn = ntohl(th->seq) - 1;
>         treq->snt_isn = cookie;
>         treq->ts_off = 0;
> -       treq->txhash = net_tx_rndhash();
>
>         /*
>          * We need to lookup the dst_entry to get the correct window size.
> @@ -238,6 +237,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
>                 fl6.flowi6_uid = sk->sk_uid;
>                 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
>
> +               treq->txhash = get_hash_from_flowi6(&fl6);
> +
>                 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
>                 if (IS_ERR(dst))
>                         goto out_free;
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index 6bb98c9..a1a5802 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -286,7 +286,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
>         if (err)
>                 goto late_failure;
>
> -       sk_set_txhash(sk);
> +       sk->sk_txhash = get_hash_from_flowi6(&fl6);
>
>         if (likely(!tp->repair)) {
>                 if (!tp->write_seq)
> @@ -470,9 +470,13 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
>         int err = -ENOMEM;
>
>         /* First, grab a route. */
> -       if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
> +       if (!dst) {
> +               if ((dst = inet6_csk_route_req(sk, fl6, req,
>                                                IPPROTO_TCP)) == NULL)
> -               goto done;
> +                       goto done;
> +
> +               tcp_rsk(req)->txhash = get_hash_from_flowi6(fl6);
> +       }
>
>         skb = tcp_make_synack(sk, dst, req, foc, synack_type);
>
> @@ -743,9 +747,13 @@ static void tcp_v6_init_req(struct request_sock *req,
>
>  static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
>                                           struct flowi *fl,
> -                                         const struct request_sock *req)
> +                                         struct request_sock *req)
>  {
> -       return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
> +       struct dst_entry *dst;
> +       dst = inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
> +       if (dst)
> +               tcp_rsk(req)->txhash = get_hash_from_flowi6(&fl->u.ip6);
> +       return dst;
>  }
>
>  struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
> --
> 2.9.5
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ