lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEfhGixdnn32bw=BWYQqG5KirdmA8rNZJEYG4MhCpLZGm7npvA@mail.gmail.com>
Date:   Wed, 21 Dec 2016 11:49:09 -0500
From:   Craig Gallek <kraigatgoog@...il.com>
To:     Josef Bacik <jbacik@...com>
Cc:     David Miller <davem@...emloft.net>,
        Hannes Frederic Sowa <hannes@...essinduktion.org>,
        Eric Dumazet <eric.dumazet@...il.com>,
        Tom Herbert <tom@...bertland.com>,
        netdev <netdev@...r.kernel.org>, kernel-team@...com
Subject: Re: [PATCH 5/5 net-next] inet: reset tb->fastreuseport when adding a
 reuseport sk

On Tue, Dec 20, 2016 at 3:07 PM, Josef Bacik <jbacik@...com> wrote:
> If we have non reuseport sockets on a tb we will set tb->fastreuseport to 0 and
> never set it again.  Which means that in the future if we end up adding a bunch
> of reuseport sk's to that tb we'll have to do the expensive scan every time.
> Instead add a sock_common to the tb so we know what reuseport sk succeeded last.
> Once one sk has made it onto the list we know that there are no potential bind
> conflicts on the owners list that match that sk's rcv_addr.  So copy the sk's
> common into our tb->fastsock and set tb->fastruseport to FASTREUSESOCK_STRICT so
> we know we have to do an extra check for subsequent reuseport sockets and skip
> the expensive bind conflict check.
>
> Signed-off-by: Josef Bacik <jbacik@...com>
> ---
>  include/net/inet_hashtables.h   |  4 ++++
>  net/ipv4/inet_connection_sock.c | 53 +++++++++++++++++++++++++++++++++++++----
>  2 files changed, 53 insertions(+), 4 deletions(-)
>
> diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
> index 50f635c..b776401 100644
> --- a/include/net/inet_hashtables.h
> +++ b/include/net/inet_hashtables.h
> @@ -74,12 +74,16 @@ struct inet_ehash_bucket {
>   * users logged onto your box, isn't it nice to know that new data
>   * ports are created in O(1) time?  I thought so. ;-)  -DaveM
>   */
> +#define FASTREUSEPORT_ANY      1
> +#define FASTREUSEPORT_STRICT   2
> +
>  struct inet_bind_bucket {
>         possible_net_t          ib_net;
>         unsigned short          port;
>         signed char             fastreuse;
>         signed char             fastreuseport;
>         kuid_t                  fastuid;
> +       struct sock_common      fastsock;
>         int                     num_owners;
>         struct hlist_node       node;
>         struct hlist_head       owners;
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index d3ccf62..9e29fad 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -164,6 +164,32 @@ success:
>         return head;
>  }
>
> +static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
> +                                    struct sock *sk)
> +{
> +       struct sock *sk2 = (struct sock *)&tb->fastsock;
> +       kuid_t uid = sock_i_uid(sk);
> +
> +       if (tb->fastreuseport <= 0)
> +               return 0;
> +       if (!sk->sk_reuseport)
> +               return 0;
> +       if (rcu_access_pointer(sk->sk_reuseport_cb))
> +               return 0;
> +       if (!uid_eq(tb->fastuid, uid))
> +               return 0;
> +       /* We only need to check the rcv_saddr if this tb was once marked
> +        * without fastreuseport and then was reset, as we can only know that
> +        * the fastsock has no potential bind conflicts with the rest of the
> +        * possible socks on the owners list.
> +        */
> +       if (tb->fastreuseport == FASTREUSEPORT_ANY)
> +               return 1;
> +       if (!inet_csk(sk)->icsk_af_ops->rcv_saddr_equal(sk, sk2, true))
The rcv_saddr_equal functions assume the type of the sk to be
inet_sock.  It doesn't look like they actually depend on any of the
inet-specific fields, but it's probably safer to either remove this
assumption or change the type of tb->fastsock to be a full inet_sock.

> +               return 0;
> +       return 1;
> +}
> +
>  /* Obtain a reference to a local port for the given sock,
>   * if snum is zero it means select any available local port.
>   * We try to allocate an odd port (and leave even ports for connect())
> @@ -206,9 +232,7 @@ tb_found:
>                         goto success;
>
>                 if ((tb->fastreuse > 0 && reuse) ||
> -                    (tb->fastreuseport > 0 &&
> -                     !rcu_access_pointer(sk->sk_reuseport_cb) &&
> -                     sk->sk_reuseport && uid_eq(tb->fastuid, uid)))
> +                   sk_reuseport_match(tb, sk))
>                         goto success;
>                 if (inet_csk_bind_conflict(sk, tb, true, true))
>                         goto fail_unlock;
> @@ -220,14 +244,35 @@ success:
>                 if (sk->sk_reuseport) {
>                         tb->fastreuseport = 1;
>                         tb->fastuid = uid;
> +                       memcpy(&tb->fastsock, &sk->__sk_common,
> +                              sizeof(struct sock_common));
>                 } else {
>                         tb->fastreuseport = 0;
>                 }
>         } else {
>                 if (!reuse)
>                         tb->fastreuse = 0;
> -               if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))
> +               if (sk->sk_reuseport) {
> +                       /* We didn't match or we don't have fastreuseport set on
> +                        * the tb, but we have sk_reuseport set on this socket
> +                        * and we know that there are no bind conflicts with
> +                        * this socket in this tb, so reset our tb's reuseport
> +                        * settings so that any subsequent sockets that match
> +                        * our current socket will be put on the fast path.
> +                        *
> +                        * If we reset we need to set FASTREUSEPORT_STRICT so we
> +                        * do extra checking for all subsequent sk_reuseport
> +                        * socks.
> +                        */
> +                       if (!sk_reuseport_match(tb, sk)) {
> +                               tb->fastreuseport = FASTREUSEPORT_STRICT;
> +                               tb->fastuid = uid;
> +                               memcpy(&tb->fastsock, &sk->__sk_common,
> +                                      sizeof(struct sock_common));
> +                       }
> +               } else {
>                         tb->fastreuseport = 0;
> +               }
>         }
>         if (!inet_csk(sk)->icsk_bind_hash)
>                 inet_bind_hash(sk, tb, port);
> --
> 2.9.3
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ