lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAK6E8=cXin1rSvQNMPife692wkd5tswsR_7RVG6FNB0rqymHeg@mail.gmail.com>
Date:	Wed, 28 Nov 2012 20:01:44 +0800
From:	Yuchung Cheng <ycheng@...gle.com>
To:	elelueck@...ux.vnet.ibm.com
Cc:	netdev@...r.kernel.org, frankbla@...ibm.com, raspl@...ibm.com,
	ubacher@...ibm.com, samudrala@...ibm.com, davem@...emloft.net
Subject: Re: [RFC PATCH] tcp: introduce raw access to experimental options

On Sat, Nov 17, 2012 at 12:54 AM,  <elelueck@...ux.vnet.ibm.com> wrote:
> From: Einar Lueck <elelueck@...ux.vnet.ibm.com>
>
> This patch adds means for raw acces to TCP expirimental options
> 253 and 254. The intention of this is to enable user space
> applications to implement communication behaviour that depends
> on experimental options. For that, new (set|get)sockopts are

Could you elaborate on the use case? I am having a hard time
understanding that. If you need to use experimental options for your
applications, why not just use another magic number according to
draft-ietf-tcpm-experimental-options-02 (since you cite that too)?

> introduced:
>
> TCP_EXPOPTS (get & set): TCP experimental options to be added to
>                          packets
> TCP_RECV_EXPOPTS (get):  experimental options received with last
>                          packet
> TCP_RECV_SYN_EXPOPTS (get): experimental options received with
>                          SYN packet
>
> TCP experimental options 253 and 254 configured via TCP_EXPOPTS on
> any TCP socket are appended to every packet that is sent as long
> as there is enough room left. If there is not enough room left they
> are silently dropped.
>
> Listening sockets reply to SYN packets with SYN ACK packets containing
> TCP experimental options 253 and 254 as configured via TCP_EXPOPTS, too.
> If a TCP connection gets established the configured experimental options
> are the defaults for the new socket, too. Thus, a getsockopt on the
> resulting accept socket for TCP_EXPOPTS returns the same stuff configured
> on the listening socket.
>
> As mentioned above, even after the 3whs is complete, experimental options
> are sent with every packet. To enable user space applications to distinguish
> between what has been advertized via SYN and what has been received with the
> last packet the aforementioned TCP_RECV_SYN_EXPOPTS and TCP_RECV_EXPOPTS are
> introduced.
>
> Today, experimental option 253 (COOKIE) and 254 (FASTOPEN) are already
> exploited. For co-existence the following approach has been taken:
>
> General remarks:
> * Interface to COOKIE and FASTOPEN stays the same
> Sender side:
> 1. COOKIE and FASTPATH code adds own options first (if applicable)
> 2. Finally, if enough room is left, TCP_EXPOPTS experimental options are
>    appended
> Receiver side:
> 1. ALL 253 and 254 experimental options are made available via
>    TCP_RECV(_SYN)_EXPOPTS
> 2. COOKIE and FASTOPEN code check if there is any option relevant for them
>
> References:
> http://tools.ietf.org/html/draft-ietf-tcpm-experimental-options-02
>
> Signed-off-by: Einar Lueck <elelueck@...ux.vnet.ibm.com>
> ---
>  include/linux/tcp.h      |  25 ++++++++++
>  include/net/tcp.h        |   3 ++
>  net/ipv4/tcp.c           | 110 +++++++++++++++++++++++++++++++++++++++++++
>  net/ipv4/tcp_input.c     | 119 +++++++++++++++++++++++++++++++----------------
>  net/ipv4/tcp_ipv4.c      |  14 ++++++
>  net/ipv4/tcp_minisocks.c |  17 +++++++
>  net/ipv4/tcp_output.c    |  37 ++++++++++++---
>  7 files changed, 279 insertions(+), 46 deletions(-)
>
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index eb125a4..b2a6451 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -110,6 +110,10 @@ enum {
>  #define TCP_REPAIR_QUEUE       20
>  #define TCP_QUEUE_SEQ          21
>  #define TCP_REPAIR_OPTIONS     22
> +#define TCP_EXPOPTS            23      /* TCP exp. options (configured) */
> +#define TCP_RECV_EXPOPTS       24      /* TCP exp. options (received) */
> +#define TCP_RECV_SYN_EXPOPTS   25      /* TCP exp. options
> +                                          (received with syn)) */
>
>  struct tcp_repair_opt {
>         __u32   opt_code;
> @@ -269,6 +273,8 @@ struct tcp_sack_block {
>  #define TCP_FACK_ENABLED  (1 << 1)   /*1 = FACK is enabled locally*/
>  #define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from peer*/
>
> +#define TCP_EXPOP_MAXLEN       40
> +
>  struct tcp_options_received {
>  /*     PAWS/RTTM data  */
>         long    ts_recent_stamp;/* Time we stored ts_recent (for aging) */
> @@ -288,6 +294,9 @@ struct tcp_options_received {
>         u8      num_sacks;      /* Number of SACK blocks                */
>         u16     user_mss;       /* mss requested by user in ioctl       */
>         u16     mss_clamp;      /* Maximal mss, negotiated at connection setup */
> +       u8      exp_opts_len;   /* length of buffer containing all exp
> +                                  options in format: kind length data */
> +       u8      exp_opts[TCP_EXPOP_MAXLEN];     /* experimental options */
>  };
>
>  static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
> @@ -295,6 +304,7 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
>         rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
>         rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
>         rx_opt->cookie_plus = 0;
> +       rx_opt->exp_opts_len = 0;
>  }
>
>  /* This is the max number of SACKS that we'll generate and process. It's safe
> @@ -315,6 +325,10 @@ struct tcp_request_sock {
>         u32                             rcv_isn;
>         u32                             snt_isn;
>         u32                             snt_synack; /* synack sent time */
> +
> +       u8 syn_expopts[TCP_EXPOP_MAXLEN];       /* experimental options
> +                                                  received with SYNACK */
> +       u8 syn_expopts_len;
>  };
>
>  static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
> @@ -406,6 +420,17 @@ struct tcp_sock {
>         u32     snd_up;         /* Urgent pointer               */
>
>         u8      keepalive_probes; /* num of allowed keep alive probes   */
> +
> +       /* for raw acces to experimental options */
> +       struct {
> +               u8 *conf;       /* lazy allocation of TCP_EXPOP_MAXLEN bytes
> +                                  for raw access to experimental options */
> +               u8 conf_len;    /* bytes actually used for experimental opts */
> +               u8 *syn;        /* experimental options received with SYN,
> +                                  allocated only if received */
> +               u8 syn_len;     /* bytes of experimental options actually
> +                                  received with SYN */
> +       } exp_opts;
>  /*
>   *      Options received (usually on last packet, some only on SYN packets).
>   */
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 1f000ff..b63d5c9 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -170,6 +170,8 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
>  #define TCPOPT_TIMESTAMP       8       /* Better RTT estimations/PAWS */
>  #define TCPOPT_MD5SIG          19      /* MD5 Signature (RFC2385) */
>  #define TCPOPT_COOKIE          253     /* Cookie extension (experimental) */
> +#define TCPOPT_EXP253          253     /* TCP experimental option 253 */
> +#define TCPOPT_EXP254          254     /* TCP experimental option 254 */
>  #define TCPOPT_EXP             254     /* Experimental */
>  /* Magic number to be after the option value for sharing TCP
>   * experimental options. See draft-ietf-tcpm-experimental-options-00.txt
> @@ -180,6 +182,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
>   *     TCP option lengths
>   */
>
> +#define TCPOLEN_MAX_ANYEXP     40
>  #define TCPOLEN_MSS            4
>  #define TCPOLEN_WINDOW         3
>  #define TCPOLEN_SACK_PERM      2
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 5f64193..e7e4947 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -423,6 +423,12 @@ void tcp_init_sock(struct sock *sk)
>         sk->sk_sndbuf = sysctl_tcp_wmem[1];
>         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
>
> +       /* memory for raw access to experimental options is allocated lazy */
> +       tp->exp_opts.conf = NULL;
> +       tp->exp_opts.conf_len = 0;
> +       tp->exp_opts.syn = NULL;
> +       tp->exp_opts.syn_len = 0;
> +
>         local_bh_disable();
>         sock_update_memcg(sk);
>         sk_sockets_allocated_inc(sk);
> @@ -2376,6 +2382,53 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
>
>         /* These are data/string values, all the others are ints */
>         switch (optname) {
> +       case TCP_EXPOPTS: {
> +               u8 conf[TCP_EXPOP_MAXLEN];
> +
> +               if (optlen > TCP_EXPOP_MAXLEN || (optlen < 4 && optlen > 0) ||
> +                   (optlen % 4 > 0))
> +                       return -EINVAL;
> +               if (optlen > 0 && !optval)
> +                       return -EINVAL;
> +
> +               /* filter for raw access to supported options */
> +               if (optlen) {
> +                       u8 i;
> +
> +                       if (copy_from_user(conf, optval, optlen))
> +                               return -EFAULT;
> +
> +                       i = 0;
> +                       while (i < optlen) {
> +                               if (conf[i] != TCPOPT_EXP253 &&
> +                                   conf[i] != TCPOPT_EXP254)
> +                                       return -EINVAL;
> +
> +                               if (i + 1 < optlen) {
> +                                       i += conf[i+1];
> +                                       if (i > optlen)
> +                                               return -EINVAL;
> +                               } else {
> +                                       return -EINVAL;
> +                               }
> +                       }
> +               }
> +
> +               lock_sock(sk);
> +               if (!optlen) {
> +                       tp->exp_opts.conf_len = 0;
> +                       release_sock(sk);
> +                       return 0;
> +               }
> +               if (!tp->exp_opts.conf) {
> +                       tp->exp_opts.conf = kzalloc(TCP_EXPOP_MAXLEN,
> +                                                   sk->sk_allocation);
> +               }
> +               memcpy(tp->exp_opts.conf, conf, optlen);
> +               tp->exp_opts.conf_len = optlen;
> +               release_sock(sk);
> +               return err;
> +       }
>         case TCP_CONGESTION: {
>                 char name[TCP_CA_NAME_MAX];
>
> @@ -2947,6 +3000,63 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
>         case TCP_USER_TIMEOUT:
>                 val = jiffies_to_msecs(icsk->icsk_user_timeout);
>                 break;
> +       case TCP_EXPOPTS: {
> +               u8 exp_opts_len;
> +
> +               if (get_user(len, optlen))
> +                       return -EFAULT;
> +               if (len < 0)
> +                       return -EINVAL;
> +
> +               exp_opts_len = tp->exp_opts.conf_len;
> +
> +               if (exp_opts_len > len)
> +                       return -EINVAL;
> +               if (put_user(exp_opts_len, optlen))
> +                       return -EFAULT;
> +               if (exp_opts_len && copy_to_user(optval, tp->exp_opts.conf,
> +                                                exp_opts_len))
> +                       return -EFAULT;
> +               return 0;
> +       }
> +       case TCP_RECV_EXPOPTS:
> +               if (get_user(len, optlen))
> +                       return -EFAULT;
> +               if (len < 0)
> +                       return -EINVAL;
> +
> +               if (len < tp->rx_opt.exp_opts_len)
> +                       return -EINVAL;
> +
> +               if (put_user(tp->rx_opt.exp_opts_len, optlen))
> +                       return -EFAULT;
> +               if (copy_to_user(optval, tp->rx_opt.exp_opts,
> +                                tp->rx_opt.exp_opts_len))
> +                       return -EFAULT;
> +               return 0;
> +       case TCP_RECV_SYN_EXPOPTS: {
> +               u8 exp_opts_len;
> +
> +               if (get_user(len, optlen))
> +                       return -EFAULT;
> +               if (len < 0)
> +                       return -EINVAL;
> +
> +               if (!tp->exp_opts.syn)
> +                       exp_opts_len = 0;
> +               else
> +                       exp_opts_len = tp->exp_opts.syn_len;
> +
> +               if (exp_opts_len > len)
> +                       return -EINVAL;
> +               if (put_user(exp_opts_len, optlen))
> +                       return -EFAULT;
> +               if (exp_opts_len && copy_to_user(optval, tp->exp_opts.syn,
> +                                                exp_opts_len)) {
> +                       return -EFAULT;
> +               }
> +               return 0;
> +       }
>         default:
>                 return -ENOPROTOOPT;
>         }
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index d377f48..130d4f4 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -3726,11 +3726,32 @@ old_ack:
>         return 0;
>  }
>
> +static inline void tcp_parse_fastopen_cookie(int opcode,
> +               int opsize,
> +               const unsigned char *ptr,
> +               struct tcp_fastopen_cookie *foc,
> +               const struct tcphdr *th) {
> +       /* Fast Open option shares code 254 using a 16 bits magic number. It's
> +        * valid only in SYN or SYN-ACK with an even size.
> +        */
> +       if (opsize < TCPOLEN_EXP_FASTOPEN_BASE ||
> +           get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC || foc == NULL ||
> +           !th->syn || (opsize & 1))
> +               return;
> +       foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE;
> +       if (foc->len >= TCP_FASTOPEN_COOKIE_MIN &&
> +           foc->len <= TCP_FASTOPEN_COOKIE_MAX)
> +               memcpy(foc->val, ptr + 2, foc->len);
> +       else if (foc->len != 0)
> +               foc->len = -1;
> +}
> +
>  /* Look for tcp options. Normally only called on SYN and SYNACK packets.
>   * But, this can also be called on packets in the established flow when
>   * the fast version below fails.
>   */
> -void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
> +void tcp_parse_options(const struct sk_buff *skb,
> +                      struct tcp_options_received *opt_rx,
>                        const u8 **hvpp, int estab,
>                        struct tcp_fastopen_cookie *foc)
>  {
> @@ -3740,6 +3761,7 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
>
>         ptr = (const unsigned char *)(th + 1);
>         opt_rx->saw_tstamp = 0;
> +       opt_rx->exp_opts_len = 0;
>
>         while (length > 0) {
>                 int opcode = *ptr++;
> @@ -3815,48 +3837,56 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
>                                  */
>                                 break;
>  #endif
> -                       case TCPOPT_COOKIE:
> -                               /* This option is variable length.
> +                       case TCPOPT_EXP253:
> +                       case TCPOPT_EXP254:
> +                               /* First parse options into raw access area for
> +                                * experimental options. Then handle
> +                                * potential exploitations
>                                  */
> -                               switch (opsize) {
> -                               case TCPOLEN_COOKIE_BASE:
> -                                       /* not yet implemented */
> -                                       break;
> -                               case TCPOLEN_COOKIE_PAIR:
> -                                       /* not yet implemented */
> -                                       break;
> -                               case TCPOLEN_COOKIE_MIN+0:
> -                               case TCPOLEN_COOKIE_MIN+2:
> -                               case TCPOLEN_COOKIE_MIN+4:
> -                               case TCPOLEN_COOKIE_MIN+6:
> -                               case TCPOLEN_COOKIE_MAX:
> -                                       /* 16-bit multiple */
> -                                       opt_rx->cookie_plus = opsize;
> -                                       *hvpp = ptr;
> -                                       break;
> -                               default:
> -                                       /* ignore option */
> -                                       break;
> +                               if (opsize <= TCPOLEN_MAX_ANYEXP &&
> +                                   opsize >= 2 &&
> +                                   (opt_rx->exp_opts_len + opsize <=
> +                                    TCPOLEN_MAX_ANYEXP)) {
> +                                       opt_rx->exp_opts[
> +                                               opt_rx->exp_opts_len] = opcode;
> +                                       opt_rx->exp_opts[
> +                                               opt_rx->exp_opts_len + 1] =
> +                                               opsize;
> +                                       memcpy(opt_rx->exp_opts +
> +                                               opt_rx->exp_opts_len + 2, ptr,
> +                                               opsize - 2);
> +                                       opt_rx->exp_opts_len += opsize;
>                                 }
> -                               break;
>
> -                       case TCPOPT_EXP:
> -                               /* Fast Open option shares code 254 using a
> -                                * 16 bits magic number. It's valid only in
> -                                * SYN or SYN-ACK with an even size.
> -                                */
> -                               if (opsize < TCPOLEN_EXP_FASTOPEN_BASE ||
> -                                   get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC ||
> -                                   foc == NULL || !th->syn || (opsize & 1))
> -                                       break;
> -                               foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE;
> -                               if (foc->len >= TCP_FASTOPEN_COOKIE_MIN &&
> -                                   foc->len <= TCP_FASTOPEN_COOKIE_MAX)
> -                                       memcpy(foc->val, ptr + 2, foc->len);
> -                               else if (foc->len != 0)
> -                                       foc->len = -1;
> +                               /* handle potential exploitations */
> +                               if (opcode == TCPOPT_COOKIE) {
> +                                       /* This option is variable length. */
> +                                       switch (opsize) {
> +                                       case TCPOLEN_COOKIE_BASE:
> +                                               /* not yet implemented */
> +                                               break;
> +                                       case TCPOLEN_COOKIE_PAIR:
> +                                               /* not yet implemented */
> +                                               break;
> +                                       case TCPOLEN_COOKIE_MIN+0:
> +                                       case TCPOLEN_COOKIE_MIN+2:
> +                                       case TCPOLEN_COOKIE_MIN+4:
> +                                       case TCPOLEN_COOKIE_MIN+6:
> +                                       case TCPOLEN_COOKIE_MAX:
> +                                               /* 16-bit multiple */
> +                                               opt_rx->cookie_plus = opsize;
> +                                               *hvpp = ptr;
> +                                               break;
> +                                       default:
> +                                               /* ignore option */
> +                                               break;
> +                                       }
> +                               } else {
> +                                       tcp_parse_fastopen_cookie(opcode,
> +                                                                 opsize, ptr,
> +                                                                 foc, th);
> +                               }
>                                 break;
> -
>                         }
>                         ptr += opsize-2;
>                         length -= opsize;
> @@ -3888,6 +3918,9 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,
>                                    const struct tcphdr *th,
>                                    struct tcp_sock *tp, const u8 **hvpp)
>  {
> +       /* required if exp options are not used anymore by the counter part */
> +       tp->rx_opt.exp_opts_len = 0;
> +
>         /* In the spirit of fast parsing, compare doff directly to constant
>          * values.  Because equality is used, short doff can be ignored here.
>          */
> @@ -5806,6 +5839,14 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
>                         }
>                 }
>
> +               if (unlikely(tp->rx_opt.exp_opts_len > 0)) {
> +                       tp->exp_opts.syn = kzalloc(tp->rx_opt.exp_opts_len,
> +                                                  sk->sk_allocation);
> +                       tp->exp_opts.syn_len = tp->rx_opt.exp_opts_len;
> +                       memcpy(tp->exp_opts.syn, &tp->rx_opt.exp_opts,
> +                              tp->rx_opt.exp_opts_len);
> +               }
> +
>                 smp_mb();
>
>                 tcp_finish_connect(sk, skb);
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 00a748d..2f66bd5 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -1321,6 +1321,16 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
>         tmp_opt.user_mss  = tp->rx_opt.user_mss;
>         tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
>
> +       /* for raw access to experimental options in SYN packet */
> +       tcp_rsk(req)->syn_expopts_len = tmp_opt.exp_opts_len;
> +       if (tcp_rsk(req)->syn_expopts_len) {
> +               /* transport experimental options via request socket to big
> +                * socket
> +                */
> +               memcpy(tcp_rsk(req)->syn_expopts, tmp_opt.exp_opts,
> +                      tcp_rsk(req)->syn_expopts_len);
> +       }
> +
>         if (tmp_opt.cookie_plus > 0 &&
>             tmp_opt.saw_tstamp &&
>             !tp->rx_opt.cookie_out_never &&
> @@ -1978,6 +1988,10 @@ void tcp_v4_destroy_sock(struct sock *sk)
>                 tp->cookie_values = NULL;
>         }
>
> +       /* buffers for raw access to experimental options */
> +       kfree(tp->exp_opts.conf);
> +       kfree(tp->exp_opts.syn);
> +
>         /* If socket is aborted during connect operation */
>         tcp_free_fastopen_req(tp);
>
> diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> index 6ff7f10..dc25875 100644
> --- a/net/ipv4/tcp_minisocks.c
> +++ b/net/ipv4/tcp_minisocks.c
> @@ -466,6 +466,23 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
>
>                 newtp->urg_data = 0;
>
> +               if (tcp_rsk(req)->syn_expopts_len) {
> +                       newtp->exp_opts.syn_len =
> +                                       tcp_rsk(req)->syn_expopts_len;
> +                       newtp->exp_opts.syn = kzalloc(newtp->exp_opts.syn_len,
> +                                                     GFP_ATOMIC);
> +                       memcpy(newtp->exp_opts.syn, tcp_rsk(req)->syn_expopts,
> +                              newtp->exp_opts.syn_len);
> +               }
> +
> +               if (oldtp->exp_opts.conf_len > 0) {
> +                       newtp->exp_opts.conf_len = oldtp->exp_opts.conf_len;
> +                       newtp->exp_opts.conf = kzalloc(TCP_EXPOP_MAXLEN,
> +                                                      GFP_ATOMIC);
> +                       memcpy(newtp->exp_opts.conf, oldtp->exp_opts.conf,
> +                              oldtp->exp_opts.conf_len);
> +               }
> +
>                 if (sock_flag(newsk, SOCK_KEEPOPEN))
>                         inet_csk_reset_keepalive_timer(newsk,
>                                                        keepalive_time_when(newtp));
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index d046326..8d7cf51 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -385,6 +385,7 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
>  #define OPTION_MD5             (1 << 2)
>  #define OPTION_WSCALE          (1 << 3)
>  #define OPTION_COOKIE_EXTENSION        (1 << 4)
> +#define OPTION_EXP             (1 << 5)
>  #define OPTION_FAST_OPEN_COOKIE        (1 << 8)
>
>  struct tcp_out_options {
> @@ -581,6 +582,12 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
>                 }
>                 ptr += (foc->len + 3) >> 2;
>         }
> +       if (unlikely(OPTION_EXP & options && tp->exp_opts.conf_len > 0)) {
> +               __u8 *p = (__u8 *) ptr;
> +               memcpy(ptr, tp->exp_opts.conf, tp->exp_opts.conf_len);
> +               p += tp->exp_opts.conf_len;
> +               ptr = (__be32 *) p;
> +       }
>  }
>
>  /* Compute TCP options for SYN packets. This is not the final
> @@ -693,6 +700,11 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
>                         remaining -= need;
>                 }
>         }
> +       if (unlikely(tp->exp_opts.conf_len > 0 &&
> +                    tp->exp_opts.conf_len <= remaining)) {
> +               opts->options |= OPTION_EXP;
> +               remaining -= tp->exp_opts.conf_len;
> +       }
>         return MAX_TCP_OPTION_SPACE - remaining;
>  }
>
> @@ -747,6 +759,11 @@ static unsigned int tcp_synack_options(struct sock *sk,
>                 if (unlikely(!ireq->tstamp_ok))
>                         remaining -= TCPOLEN_SACKPERM_ALIGNED;
>         }
> +       if (unlikely(tcp_sk(sk)->exp_opts.conf_len > 0 &&
> +                    tcp_sk(sk)->exp_opts.conf_len <= remaining)) {
> +               opts->options |= OPTION_EXP;
> +               remaining -= tcp_sk(sk)->exp_opts.conf_len;
> +       }
>
>         /* Similar rationale to tcp_syn_options() applies here, too.
>          * If the <SYN> options fit, the same options should fit now!
> @@ -782,38 +799,44 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
>  {
>         struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
>         struct tcp_sock *tp = tcp_sk(sk);
> -       unsigned int size = 0;
> +       unsigned remaining = MAX_TCP_OPTION_SPACE;
>         unsigned int eff_sacks;
>
>  #ifdef CONFIG_TCP_MD5SIG
>         *md5 = tp->af_specific->md5_lookup(sk, sk);
>         if (unlikely(*md5)) {
>                 opts->options |= OPTION_MD5;
> -               size += TCPOLEN_MD5SIG_ALIGNED;
> +               remaining -= TCPOLEN_MD5SIG_ALIGNED;
>         }
>  #else
>         *md5 = NULL;
>  #endif
>
> -       if (likely(tp->rx_opt.tstamp_ok)) {
> +       if (likely(tp->rx_opt.tstamp_ok &&
> +                  remaining >= TCPOLEN_TSTAMP_ALIGNED)) {
>                 opts->options |= OPTION_TS;
>                 opts->tsval = tcb ? tcb->when : 0;
>                 opts->tsecr = tp->rx_opt.ts_recent;
> -               size += TCPOLEN_TSTAMP_ALIGNED;
> +               remaining -= TCPOLEN_TSTAMP_ALIGNED;
>         }
>
>         eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
>         if (unlikely(eff_sacks)) {
> -               const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
>                 opts->num_sack_blocks =
>                         min_t(unsigned int, eff_sacks,
>                               (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
>                               TCPOLEN_SACK_PERBLOCK);
> -               size += TCPOLEN_SACK_BASE_ALIGNED +
> +               remaining -= TCPOLEN_SACK_BASE_ALIGNED +
>                         opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
>         }
>
> -       return size;
> +       if (unlikely(tp->exp_opts.conf_len > 0 &&
> +                    tp->exp_opts.conf_len <= remaining)) {
> +               opts->options |= OPTION_EXP;
> +               remaining -= tp->exp_opts.conf_len;
> +       }
> +
> +       return MAX_TCP_OPTION_SPACE - remaining;
>  }
>
>
> --
> 1.7.12.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ