lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAK6E8=fb_8Nq-LHU7dPLESBdZHM4Vhc7Vw4UHQEJPcMMVkoiWw@mail.gmail.com>
Date:   Mon, 23 Jan 2017 12:55:16 -0800
From:   Yuchung Cheng <ycheng@...gle.com>
To:     Wei Wang <tracywwnj@...il.com>
Cc:     netdev <netdev@...r.kernel.org>,
        David Miller <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        Wei Wang <weiwan@...gle.com>
Subject: Re: [PATCH net-next 3/3] net/tcp-fastopen: Add new API support

On Mon, Jan 23, 2017 at 10:59 AM, Wei Wang <tracywwnj@...il.com> wrote:
> From: Wei Wang <weiwan@...gle.com>
>
> This patch adds a new socket option, TCP_FASTOPEN_CONNECT, as an
> alternative way to perform Fast Open on the active side (client). Prior
> to this patch, a client needs to replace the connect() call with
> sendto(MSG_FASTOPEN). This can be cumbersome for applications who want
> to use Fast Open: these socket operations are often done in lower layer
> libraries used by many other applications. Changing these libraries
> and/or the socket call sequences are not trivial. A more convenient
> approach is to perform Fast Open by simply enabling a socket option when
> the socket is created w/o changing other socket calls sequence:
>   s = socket()
>     create a new socket
>   setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN_CONNECT …);
>     newly introduced sockopt
>     If set, new functionality described below will be used.
>     Return ENOTSUPP if TFO is not supported or not enabled in the
>     kernel.
>
>   connect()
>     With cookie present, return 0 immediately.
>     With no cookie, initiate 3WHS with TFO cookie-request option and
>     return -1 with errno = EINPROGRESS.
>
>   write()/sendmsg()
>     With cookie present, send out SYN with data and return the number of
>     bytes buffered.
>     With no cookie, and 3WHS not yet completed, return -1 with errno =
>     EINPROGRESS.
>     No MSG_FASTOPEN flag is needed.
>
>   read()
>     Return -1 with errno = EWOULDBLOCK/EAGAIN if connect() is called but
>     write() is not called yet.
>     Return -1 with errno = EWOULDBLOCK/EAGAIN if connection is
>     established but no msg is received yet.
>     Return number of bytes read if socket is established and there is
>     msg received.
>
> The new API simplifies life for applications that always perform a write()
> immediately after a successful connect(). Such applications can now take
> advantage of Fast Open by merely making one new setsockopt() call at the time
> of creating the socket. Nothing else about the application's socket call
> sequence needs to change.
>
> Signed-off-by: Wei Wang <weiwan@...gle.com>
> ---
Acked-by: Yuchung Cheng <ycheng@...gle.com>

Thanks for making this happen.

>  include/linux/tcp.h      |  3 ++-
>  include/net/inet_sock.h  |  6 +++++-
>  include/net/tcp.h        |  1 +
>  include/uapi/linux/tcp.h |  1 +
>  net/ipv4/af_inet.c       | 31 ++++++++++++++++++++++++-------
>  net/ipv4/tcp.c           | 35 ++++++++++++++++++++++++++++++++++-
>  net/ipv4/tcp_fastopen.c  | 33 +++++++++++++++++++++++++++++++++
>  net/ipv4/tcp_ipv4.c      |  7 ++++++-
>  net/ipv6/tcp_ipv6.c      |  5 +++++
>  9 files changed, 111 insertions(+), 11 deletions(-)
>
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index 5371b3d70cfe..f88f4649ba6f 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -222,7 +222,8 @@ struct tcp_sock {
>         u32     chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
>         u8      chrono_type:2,  /* current chronograph type */
>                 rate_app_limited:1,  /* rate_{delivered,interval_us} limited? */
> -               unused:5;
> +               fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
> +               unused:4;
>         u8      nonagle     : 4,/* Disable Nagle algorithm?             */
>                 thin_lto    : 1,/* Use linear timeouts for thin streams */
>                 unused1     : 1,
> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
> index c9cff977a7fb..aa95053dfc78 100644
> --- a/include/net/inet_sock.h
> +++ b/include/net/inet_sock.h
> @@ -206,7 +206,11 @@ struct inet_sock {
>                                 transparent:1,
>                                 mc_all:1,
>                                 nodefrag:1;
> -       __u8                    bind_address_no_port:1;
> +       __u8                    bind_address_no_port:1,
> +                               defer_connect:1; /* Indicates that fastopen_connect is set
> +                                                 * and cookie exists so we defer connect
> +                                                 * until first data frame is written
> +                                                 */
>         __u8                    rcv_tos;
>         __u8                    convert_csum;
>         int                     uc_index;
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index de67541d7adf..6ec4ea652f3f 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -1495,6 +1495,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
>  void tcp_fastopen_init_key_once(bool publish);
>  bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
>                              struct tcp_fastopen_cookie *cookie);
> +bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
>  #define TCP_FASTOPEN_KEY_LENGTH 16
>
>  /* Fastopen key context */
> diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
> index c53de2691cec..6ff35eb48d10 100644
> --- a/include/uapi/linux/tcp.h
> +++ b/include/uapi/linux/tcp.h
> @@ -116,6 +116,7 @@ enum {
>  #define TCP_SAVE_SYN           27      /* Record SYN headers for new connections */
>  #define TCP_SAVED_SYN          28      /* Get SYN headers recorded for connection */
>  #define TCP_REPAIR_WINDOW      29      /* Get/set window parameters */
> +#define TCP_FASTOPEN_CONNECT   30      /* Attempt FastOpen with connect */
>
>  struct tcp_repair_opt {
>         __u32   opt_code;
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index aae410bb655a..d8a0dc076f97 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -576,13 +576,24 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
>         int err;
>         long timeo;
>
> -       if (addr_len < sizeof(uaddr->sa_family))
> -               return -EINVAL;
> +       /*
> +        * uaddr can be NULL and addr_len can be 0 if:
> +        * sk is a TCP fastopen active socket and
> +        * TCP_FASTOPEN_CONNECT sockopt is set and
> +        * we already have a valid cookie for this socket.
> +        * In this case, user can call write() after connect().
> +        * write() will invoke tcp_sendmsg_fastopen() which calls
> +        * __inet_stream_connect().
> +        */
> +       if (uaddr) {
> +               if (addr_len < sizeof(uaddr->sa_family))
> +                       return -EINVAL;
>
> -       if (uaddr->sa_family == AF_UNSPEC) {
> -               err = sk->sk_prot->disconnect(sk, flags);
> -               sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
> -               goto out;
> +               if (uaddr->sa_family == AF_UNSPEC) {
> +                       err = sk->sk_prot->disconnect(sk, flags);
> +                       sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
> +                       goto out;
> +               }
>         }
>
>         switch (sock->state) {
> @@ -593,7 +604,10 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
>                 err = -EISCONN;
>                 goto out;
>         case SS_CONNECTING:
> -               err = -EALREADY;
> +               if (inet_sk(sk)->defer_connect)
> +                       err = -EINPROGRESS;
> +               else
> +                       err = -EALREADY;
>                 /* Fall out of switch with err, set for this state */
>                 break;
>         case SS_UNCONNECTED:
> @@ -607,6 +621,9 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
>
>                 sock->state = SS_CONNECTING;
>
> +               if (!err && inet_sk(sk)->defer_connect)
> +                       goto out;
> +
>                 /* Just entered SS_CONNECTING state; the only
>                  * difference is that return value in non-blocking
>                  * case is EINPROGRESS, rather than EALREADY.
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index c43eb1a831d7..d9735b76d073 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -533,6 +533,12 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
>
>                 if (tp->urg_data & TCP_URG_VALID)
>                         mask |= POLLPRI;
> +       } else if (sk->sk_state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
> +               /* Active TCP fastopen socket with defer_connect
> +                * Return POLLOUT so application can call write()
> +                * in order for kernel to generate SYN+data
> +                */
> +               mask |= POLLOUT | POLLWRNORM;
>         }
>         /* This barrier is coupled with smp_wmb() in tcp_reset() */
>         smp_rmb();
> @@ -1071,6 +1077,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
>                                 int *copied, size_t size)
>  {
>         struct tcp_sock *tp = tcp_sk(sk);
> +       struct inet_sock *inet = inet_sk(sk);
>         int err, flags;
>
>         if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE))
> @@ -1085,9 +1092,19 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
>         tp->fastopen_req->data = msg;
>         tp->fastopen_req->size = size;
>
> +       if (inet->defer_connect) {
> +               err = tcp_connect(sk);
> +               /* Same failure procedure as in tcp_v4/6_connect */
> +               if (err) {
> +                       tcp_set_state(sk, TCP_CLOSE);
> +                       inet->inet_dport = 0;
> +                       sk->sk_route_caps = 0;
> +               }
> +       }
>         flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
>         err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
>                                     msg->msg_namelen, flags);
> +       inet->defer_connect = 0;
>         *copied = tp->fastopen_req->copied;
>         tcp_free_fastopen_req(tp);
>         return err;
> @@ -1107,7 +1124,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
>         lock_sock(sk);
>
>         flags = msg->msg_flags;
> -       if (flags & MSG_FASTOPEN) {
> +       if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) {
>                 err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
>                 if (err == -EINPROGRESS && copied_syn > 0)
>                         goto out;
> @@ -2656,6 +2673,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
>                         err = -EINVAL;
>                 }
>                 break;
> +       case TCP_FASTOPEN_CONNECT:
> +               if (val > 1 || val < 0) {
> +                       err = -EINVAL;
> +               } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
> +                       if (sk->sk_state == TCP_CLOSE)
> +                               tp->fastopen_connect = val;
> +                       else
> +                               err = -EINVAL;
> +               } else {
> +                       err = -EOPNOTSUPP;
> +               }
> +               break;
>         case TCP_TIMESTAMP:
>                 if (!tp->repair)
>                         err = -EPERM;
> @@ -3016,6 +3045,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
>                 val = icsk->icsk_accept_queue.fastopenq.max_qlen;
>                 break;
>
> +       case TCP_FASTOPEN_CONNECT:
> +               val = tp->fastopen_connect;
> +               break;
> +
>         case TCP_TIMESTAMP:
>                 val = tcp_time_stamp + tp->tsoffset;
>                 break;
> diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
> index f90e09e1ff4c..9674bec4a0f8 100644
> --- a/net/ipv4/tcp_fastopen.c
> +++ b/net/ipv4/tcp_fastopen.c
> @@ -346,3 +346,36 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
>         }
>         return cookie->len > 0;
>  }
> +
> +/* This function checks if we want to defer sending SYN until the first
> + * write().  We defer under the following conditions:
> + * 1. fastopen_connect sockopt is set
> + * 2. we have a valid cookie
> + * Return value: return true if we want to defer until application writes data
> + *               return false if we want to send out SYN immediately
> + */
> +bool tcp_fastopen_defer_connect(struct sock *sk, int *err)
> +{
> +       struct tcp_fastopen_cookie cookie = { .len = 0 };
> +       struct tcp_sock *tp = tcp_sk(sk);
> +       u16 mss;
> +
> +       if (tp->fastopen_connect && !tp->fastopen_req) {
> +               if (tcp_fastopen_cookie_check(sk, &mss, &cookie)) {
> +                       inet_sk(sk)->defer_connect = 1;
> +                       return true;
> +               }
> +
> +               /* Alloc fastopen_req in order for FO option to be included
> +                * in SYN
> +                */
> +               tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req),
> +                                          sk->sk_allocation);
> +               if (tp->fastopen_req)
> +                       tp->fastopen_req->cookie = cookie;
> +               else
> +                       *err = -ENOBUFS;
> +       }
> +       return false;
> +}
> +EXPORT_SYMBOL(tcp_fastopen_defer_connect);
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index f7325b25b06e..27b726c96459 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -232,6 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
>         /* OK, now commit destination to socket.  */
>         sk->sk_gso_type = SKB_GSO_TCPV4;
>         sk_setup_caps(sk, &rt->dst);
> +       rt = NULL;
>
>         if (!tp->write_seq && likely(!tp->repair))
>                 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
> @@ -242,9 +243,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
>
>         inet->inet_id = tp->write_seq ^ jiffies;
>
> +       if (tcp_fastopen_defer_connect(sk, &err))
> +               return err;
> +       if (err)
> +               goto failure;
> +
>         err = tcp_connect(sk);
>
> -       rt = NULL;
>         if (err)
>                 goto failure;
>
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index 0b7cd3d009b6..95c05e5293b1 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -287,6 +287,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
>                                                              inet->inet_dport,
>                                                              &tp->tsoffset);
>
> +       if (tcp_fastopen_defer_connect(sk, &err))
> +               return err;
> +       if (err)
> +               goto late_failure;
> +
>         err = tcp_connect(sk);
>         if (err)
>                 goto late_failure;
> --
> 2.11.0.483.g087da7b7c-goog
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ