lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1370585517.4021.94.camel@deadeye.wl.decadent.org.uk>
Date:	Fri, 07 Jun 2013 07:11:57 +0100
From:	Ben Hutchings <ben@...adent.org.uk>
To:	Willy Tarreau <w@....eu>
Cc:	linux-kernel@...r.kernel.org, stable@...r.kernel.org,
	Eric Dumazet <eric.dumazet@...il.com>,
	Herbert Xu <herbert@...dor.hengli.com.au>,
	"David S. Miller" <davem@...emloft.net>
Subject: Re: [ 157/184] inet: add RCU protection to inet->opt

On Tue, 2013-06-04 at 19:24 +0200, Willy Tarreau wrote:
> 2.6.32-longterm review patch.  If anyone has any objections, please let me know.
> 
> ------------------
> 
> From: Eric Dumazet <eric.dumazet@...il.com>
> 
> commit f6d8bd051c391c1c0458a30b2a7abcd939329259 upstream.
> 
> We lack proper synchronization to manipulate inet->opt ip_options
> 
> Problem is ip_make_skb() calls ip_setup_cork() and
> ip_setup_cork() possibly makes a copy of ipc->opt (struct ip_options),
> without any protection against another thread manipulating inet->opt.
> 
> Another thread can change inet->opt pointer and free old one under us.
> 
> Use RCU to protect inet->opt (changed to inet->inet_opt).
> 
> Instead of handling atomic refcounts, just copy ip_options when
> necessary, to avoid cache line dirtying.
> 
> We cant insert an rcu_head in struct ip_options since its included in
> skb->cb[], so this patch is large because I had to introduce a new
> ip_options_rcu structure.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@...il.com>
> Cc: Herbert Xu <herbert@...dor.apana.org.au>
> Signed-off-by: David S. Miller <davem@...emloft.net>
> [dannf/bwh: backported to Debian's 2.6.32]

Signed-off-by: Ben Hutchings <ben@...adent.org.uk>

> Signed-off-by: Willy Tarreau <w@....eu>
> ---
>  include/net/inet_sock.h         |  14 +++--
>  include/net/ip.h                |  11 ++--
>  net/dccp/ipv4.c                 |  15 +++---
>  net/dccp/ipv6.c                 |   2 +-
>  net/ipv4/af_inet.c              |  16 ++++--
>  net/ipv4/cipso_ipv4.c           | 113 ++++++++++++++++++++++------------------
>  net/ipv4/icmp.c                 |  23 ++++----
>  net/ipv4/inet_connection_sock.c |   8 +--
>  net/ipv4/ip_options.c           |  38 +++++++-------
>  net/ipv4/ip_output.c            |  50 +++++++++---------
>  net/ipv4/ip_sockglue.c          |  33 ++++++++----
>  net/ipv4/raw.c                  |  19 +++++--
>  net/ipv4/syncookies.c           |   4 +-
>  net/ipv4/tcp_ipv4.c             |  33 +++++++-----
>  net/ipv4/udp.c                  |  21 ++++++--
>  net/ipv6/tcp_ipv6.c             |   2 +-
>  16 files changed, 235 insertions(+), 167 deletions(-)
> 
> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
> index 47004f3..cf65e77 100644
> --- a/include/net/inet_sock.h
> +++ b/include/net/inet_sock.h
> @@ -56,7 +56,15 @@ struct ip_options {
>  	unsigned char	__data[0];
>  };
>  
> -#define optlength(opt) (sizeof(struct ip_options) + opt->optlen)
> +struct ip_options_rcu {
> +	struct rcu_head rcu;
> +	struct ip_options opt;
> +};
> +
> +struct ip_options_data {
> +	struct ip_options_rcu	opt;
> +	char			data[40];
> +};
>  
>  struct inet_request_sock {
>  	struct request_sock	req;
> @@ -77,7 +85,7 @@ struct inet_request_sock {
>  				acked	   : 1,
>  				no_srccheck: 1;
>  	kmemcheck_bitfield_end(flags);
> -	struct ip_options	*opt;
> +	struct ip_options_rcu	*opt;
>  };
>  
>  static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
> @@ -122,7 +130,7 @@ struct inet_sock {
>  	__be32			saddr;
>  	__s16			uc_ttl;
>  	__u16			cmsg_flags;
> -	struct ip_options	*opt;
> +	struct ip_options_rcu	*inet_opt;
>  	__be16			sport;
>  	__u16			id;
>  	__u8			tos;
> diff --git a/include/net/ip.h b/include/net/ip.h
> index 69db943..a7d4675 100644
> --- a/include/net/ip.h
> +++ b/include/net/ip.h
> @@ -54,7 +54,7 @@ struct ipcm_cookie
>  {
>  	__be32			addr;
>  	int			oif;
> -	struct ip_options	*opt;
> +	struct ip_options_rcu	*opt;
>  	union skb_shared_tx	shtx;
>  };
>  
> @@ -92,7 +92,7 @@ extern int		igmp_mc_proc_init(void);
>  
>  extern int		ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
>  					      __be32 saddr, __be32 daddr,
> -					      struct ip_options *opt);
> +					      struct ip_options_rcu *opt);
>  extern int		ip_rcv(struct sk_buff *skb, struct net_device *dev,
>  			       struct packet_type *pt, struct net_device *orig_dev);
>  extern int		ip_local_deliver(struct sk_buff *skb);
> @@ -362,14 +362,15 @@ extern int ip_forward(struct sk_buff *skb);
>   *	Functions provided by ip_options.c
>   */
>   
> -extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag);
> +extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
> +			     __be32 daddr, struct rtable *rt, int is_frag);
>  extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb);
>  extern void ip_options_fragment(struct sk_buff *skb);
>  extern int ip_options_compile(struct net *net,
>  			      struct ip_options *opt, struct sk_buff *skb);
> -extern int ip_options_get(struct net *net, struct ip_options **optp,
> +extern int ip_options_get(struct net *net, struct ip_options_rcu **optp,
>  			  unsigned char *data, int optlen);
> -extern int ip_options_get_from_user(struct net *net, struct ip_options **optp,
> +extern int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
>  				    unsigned char __user *data, int optlen);
>  extern void ip_options_undo(struct ip_options * opt);
>  extern void ip_forward_options(struct sk_buff *skb);
> diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
> index d14c0a3..cef3656 100644
> --- a/net/dccp/ipv4.c
> +++ b/net/dccp/ipv4.c
> @@ -47,6 +47,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
>  	__be32 daddr, nexthop;
>  	int tmp;
>  	int err;
> +	struct ip_options_rcu *inet_opt;
>  
>  	dp->dccps_role = DCCP_ROLE_CLIENT;
>  
> @@ -57,10 +58,12 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
>  		return -EAFNOSUPPORT;
>  
>  	nexthop = daddr = usin->sin_addr.s_addr;
> -	if (inet->opt != NULL && inet->opt->srr) {
> +
> +	inet_opt = inet->inet_opt;
> +	if (inet_opt != NULL && inet_opt->opt.srr) {
>  		if (daddr == 0)
>  			return -EINVAL;
> -		nexthop = inet->opt->faddr;
> +		nexthop = inet_opt->opt.faddr;
>  	}
>  
>  	tmp = ip_route_connect(&rt, nexthop, inet->saddr,
> @@ -75,7 +78,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
>  		return -ENETUNREACH;
>  	}
>  
> -	if (inet->opt == NULL || !inet->opt->srr)
> +	if (inet_opt == NULL || !inet_opt->opt.srr)
>  		daddr = rt->rt_dst;
>  
>  	if (inet->saddr == 0)
> @@ -86,8 +89,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
>  	inet->daddr = daddr;
>  
>  	inet_csk(sk)->icsk_ext_hdr_len = 0;
> -	if (inet->opt != NULL)
> -		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
> +	if (inet_opt)
> +		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
>  	/*
>  	 * Socket identity is still unknown (sport may be zero).
>  	 * However we set state to DCCP_REQUESTING and not releasing socket
> @@ -397,7 +400,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
>  	newinet->daddr	   = ireq->rmt_addr;
>  	newinet->rcv_saddr = ireq->loc_addr;
>  	newinet->saddr	   = ireq->loc_addr;
> -	newinet->opt	   = ireq->opt;
> +	newinet->inet_opt	= ireq->opt;
>  	ireq->opt	   = NULL;
>  	newinet->mc_index  = inet_iif(skb);
>  	newinet->mc_ttl	   = ip_hdr(skb)->ttl;
> diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
> index 9ed1962..2f11de7 100644
> --- a/net/dccp/ipv6.c
> +++ b/net/dccp/ipv6.c
> @@ -600,7 +600,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
>  
>  	   First: no IPv4 options.
>  	 */
> -	newinet->opt = NULL;
> +	newinet->inet_opt = NULL;
>  
>  	/* Clone RX bits */
>  	newnp->rxopt.all = np->rxopt.all;
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index a289878..d1992a4 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -152,7 +152,7 @@ void inet_sock_destruct(struct sock *sk)
>  	WARN_ON(sk->sk_wmem_queued);
>  	WARN_ON(sk->sk_forward_alloc);
>  
> -	kfree(inet->opt);
> +	kfree(inet->inet_opt);
>  	dst_release(sk->sk_dst_cache);
>  	sk_refcnt_debug_dec(sk);
>  }
> @@ -1065,9 +1065,11 @@ static int inet_sk_reselect_saddr(struct sock *sk)
>  	__be32 old_saddr = inet->saddr;
>  	__be32 new_saddr;
>  	__be32 daddr = inet->daddr;
> +	struct ip_options_rcu *inet_opt;
>  
> -	if (inet->opt && inet->opt->srr)
> -		daddr = inet->opt->faddr;
> +	inet_opt = inet->inet_opt;
> +	if (inet_opt && inet_opt->opt.srr)
> +		daddr = inet_opt->opt.faddr;
>  
>  	/* Query new route. */
>  	err = ip_route_connect(&rt, daddr, 0,
> @@ -1109,6 +1111,7 @@ int inet_sk_rebuild_header(struct sock *sk)
>  	struct inet_sock *inet = inet_sk(sk);
>  	struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
>  	__be32 daddr;
> +	struct ip_options_rcu *inet_opt;
>  	int err;
>  
>  	/* Route is OK, nothing to do. */
> @@ -1116,9 +1119,12 @@ int inet_sk_rebuild_header(struct sock *sk)
>  		return 0;
>  
>  	/* Reroute. */
> +	rcu_read_lock();
> +	inet_opt = rcu_dereference(inet->inet_opt);
>  	daddr = inet->daddr;
> -	if (inet->opt && inet->opt->srr)
> -		daddr = inet->opt->faddr;
> +	if (inet_opt && inet_opt->opt.srr)
> +		daddr = inet_opt->opt.faddr;
> +	rcu_read_unlock();
>  {
>  	struct flowi fl = {
>  		.oif = sk->sk_bound_dev_if,
> diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
> index 10f8f8d..b6d06d6 100644
> --- a/net/ipv4/cipso_ipv4.c
> +++ b/net/ipv4/cipso_ipv4.c
> @@ -1860,6 +1860,11 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
>  	return CIPSO_V4_HDR_LEN + ret_val;
>  }
>  
> +static void opt_kfree_rcu(struct rcu_head *head)
> +{
> +	kfree(container_of(head, struct ip_options_rcu, rcu));
> +}
> +
>  /**
>   * cipso_v4_sock_setattr - Add a CIPSO option to a socket
>   * @sk: the socket
> @@ -1882,7 +1887,7 @@ int cipso_v4_sock_setattr(struct sock *sk,
>  	unsigned char *buf = NULL;
>  	u32 buf_len;
>  	u32 opt_len;
> -	struct ip_options *opt = NULL;
> +	struct ip_options_rcu *old, *opt = NULL;
>  	struct inet_sock *sk_inet;
>  	struct inet_connection_sock *sk_conn;
>  
> @@ -1918,22 +1923,25 @@ int cipso_v4_sock_setattr(struct sock *sk,
>  		ret_val = -ENOMEM;
>  		goto socket_setattr_failure;
>  	}
> -	memcpy(opt->__data, buf, buf_len);
> -	opt->optlen = opt_len;
> -	opt->cipso = sizeof(struct iphdr);
> +	memcpy(opt->opt.__data, buf, buf_len);
> +	opt->opt.optlen = opt_len;
> +	opt->opt.cipso = sizeof(struct iphdr);
>  	kfree(buf);
>  	buf = NULL;
>  
>  	sk_inet = inet_sk(sk);
> +
> +	old = sk_inet->inet_opt;
>  	if (sk_inet->is_icsk) {
>  		sk_conn = inet_csk(sk);
> -		if (sk_inet->opt)
> -			sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen;
> -		sk_conn->icsk_ext_hdr_len += opt->optlen;
> +		if (old)
> +			sk_conn->icsk_ext_hdr_len -= old->opt.optlen;
> +		sk_conn->icsk_ext_hdr_len += opt->opt.optlen;
>  		sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
>  	}
> -	opt = xchg(&sk_inet->opt, opt);
> -	kfree(opt);
> +	rcu_assign_pointer(sk_inet->inet_opt, opt);
> +	if (old)
> +		call_rcu(&old->rcu, opt_kfree_rcu);
>  
>  	return 0;
>  
> @@ -1963,7 +1971,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
>  	unsigned char *buf = NULL;
>  	u32 buf_len;
>  	u32 opt_len;
> -	struct ip_options *opt = NULL;
> +	struct ip_options_rcu *opt = NULL;
>  	struct inet_request_sock *req_inet;
>  
>  	/* We allocate the maximum CIPSO option size here so we are probably
> @@ -1991,15 +1999,16 @@ int cipso_v4_req_setattr(struct request_sock *req,
>  		ret_val = -ENOMEM;
>  		goto req_setattr_failure;
>  	}
> -	memcpy(opt->__data, buf, buf_len);
> -	opt->optlen = opt_len;
> -	opt->cipso = sizeof(struct iphdr);
> +	memcpy(opt->opt.__data, buf, buf_len);
> +	opt->opt.optlen = opt_len;
> +	opt->opt.cipso = sizeof(struct iphdr);
>  	kfree(buf);
>  	buf = NULL;
>  
>  	req_inet = inet_rsk(req);
>  	opt = xchg(&req_inet->opt, opt);
> -	kfree(opt);
> +	if (opt)
> +		call_rcu(&opt->rcu, opt_kfree_rcu);
>  
>  	return 0;
>  
> @@ -2019,34 +2028,34 @@ req_setattr_failure:
>   * values on failure.
>   *
>   */
> -int cipso_v4_delopt(struct ip_options **opt_ptr)
> +int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
>  {
>  	int hdr_delta = 0;
> -	struct ip_options *opt = *opt_ptr;
> +	struct ip_options_rcu *opt = *opt_ptr;
>  
> -	if (opt->srr || opt->rr || opt->ts || opt->router_alert) {
> +	if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
>  		u8 cipso_len;
>  		u8 cipso_off;
>  		unsigned char *cipso_ptr;
>  		int iter;
>  		int optlen_new;
>  
> -		cipso_off = opt->cipso - sizeof(struct iphdr);
> -		cipso_ptr = &opt->__data[cipso_off];
> +		cipso_off = opt->opt.cipso - sizeof(struct iphdr);
> +		cipso_ptr = &opt->opt.__data[cipso_off];
>  		cipso_len = cipso_ptr[1];
>  
> -		if (opt->srr > opt->cipso)
> -			opt->srr -= cipso_len;
> -		if (opt->rr > opt->cipso)
> -			opt->rr -= cipso_len;
> -		if (opt->ts > opt->cipso)
> -			opt->ts -= cipso_len;
> -		if (opt->router_alert > opt->cipso)
> -			opt->router_alert -= cipso_len;
> -		opt->cipso = 0;
> +		if (opt->opt.srr > opt->opt.cipso)
> +			opt->opt.srr -= cipso_len;
> +		if (opt->opt.rr > opt->opt.cipso)
> +			opt->opt.rr -= cipso_len;
> +		if (opt->opt.ts > opt->opt.cipso)
> +			opt->opt.ts -= cipso_len;
> +		if (opt->opt.router_alert > opt->opt.cipso)
> +			opt->opt.router_alert -= cipso_len;
> +		opt->opt.cipso = 0;
>  
>  		memmove(cipso_ptr, cipso_ptr + cipso_len,
> -			opt->optlen - cipso_off - cipso_len);
> +			opt->opt.optlen - cipso_off - cipso_len);
>  
>  		/* determining the new total option length is tricky because of
>  		 * the padding necessary, the only thing i can think to do at
> @@ -2055,21 +2064,21 @@ int cipso_v4_delopt(struct ip_options **opt_ptr)
>  		 * from there we can determine the new total option length */
>  		iter = 0;
>  		optlen_new = 0;
> -		while (iter < opt->optlen)
> -			if (opt->__data[iter] != IPOPT_NOP) {
> -				iter += opt->__data[iter + 1];
> +		while (iter < opt->opt.optlen)
> +			if (opt->opt.__data[iter] != IPOPT_NOP) {
> +				iter += opt->opt.__data[iter + 1];
>  				optlen_new = iter;
>  			} else
>  				iter++;
> -		hdr_delta = opt->optlen;
> -		opt->optlen = (optlen_new + 3) & ~3;
> -		hdr_delta -= opt->optlen;
> +		hdr_delta = opt->opt.optlen;
> +		opt->opt.optlen = (optlen_new + 3) & ~3;
> +		hdr_delta -= opt->opt.optlen;
>  	} else {
>  		/* only the cipso option was present on the socket so we can
>  		 * remove the entire option struct */
>  		*opt_ptr = NULL;
> -		hdr_delta = opt->optlen;
> -		kfree(opt);
> +		hdr_delta = opt->opt.optlen;
> +		call_rcu(&opt->rcu, opt_kfree_rcu);
>  	}
>  
>  	return hdr_delta;
> @@ -2086,15 +2095,15 @@ int cipso_v4_delopt(struct ip_options **opt_ptr)
>  void cipso_v4_sock_delattr(struct sock *sk)
>  {
>  	int hdr_delta;
> -	struct ip_options *opt;
> +	struct ip_options_rcu *opt;
>  	struct inet_sock *sk_inet;
>  
>  	sk_inet = inet_sk(sk);
> -	opt = sk_inet->opt;
> -	if (opt == NULL || opt->cipso == 0)
> +	opt = sk_inet->inet_opt;
> +	if (opt == NULL || opt->opt.cipso == 0)
>  		return;
>  
> -	hdr_delta = cipso_v4_delopt(&sk_inet->opt);
> +	hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
>  	if (sk_inet->is_icsk && hdr_delta > 0) {
>  		struct inet_connection_sock *sk_conn = inet_csk(sk);
>  		sk_conn->icsk_ext_hdr_len -= hdr_delta;
> @@ -2112,12 +2121,12 @@ void cipso_v4_sock_delattr(struct sock *sk)
>   */
>  void cipso_v4_req_delattr(struct request_sock *req)
>  {
> -	struct ip_options *opt;
> +	struct ip_options_rcu *opt;
>  	struct inet_request_sock *req_inet;
>  
>  	req_inet = inet_rsk(req);
>  	opt = req_inet->opt;
> -	if (opt == NULL || opt->cipso == 0)
> +	if (opt == NULL || opt->opt.cipso == 0)
>  		return;
>  
>  	cipso_v4_delopt(&req_inet->opt);
> @@ -2187,14 +2196,18 @@ getattr_return:
>   */
>  int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
>  {
> -	struct ip_options *opt;
> +	struct ip_options_rcu *opt;
> +	int res = -ENOMSG;
>  
> -	opt = inet_sk(sk)->opt;
> -	if (opt == NULL || opt->cipso == 0)
> -		return -ENOMSG;
> -
> -	return cipso_v4_getattr(opt->__data + opt->cipso - sizeof(struct iphdr),
> -				secattr);
> +	rcu_read_lock();
> +	opt = rcu_dereference(inet_sk(sk)->inet_opt);
> +	if (opt && opt->opt.cipso)
> +		res = cipso_v4_getattr(opt->opt.__data +
> +						opt->opt.cipso -
> +						sizeof(struct iphdr),
> +				       secattr);
> +	rcu_read_unlock();
> +	return res;
>  }
>  
>  /**
> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
> index 5bc13fe..859d781 100644
> --- a/net/ipv4/icmp.c
> +++ b/net/ipv4/icmp.c
> @@ -107,8 +107,7 @@ struct icmp_bxm {
>  		__be32	       times[3];
>  	} data;
>  	int head_len;
> -	struct ip_options replyopts;
> -	unsigned char  optbuf[40];
> +	struct ip_options_data replyopts;
>  };
>  
>  /* An array of errno for error messages from dest unreach. */
> @@ -362,7 +361,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
>  	struct inet_sock *inet;
>  	__be32 daddr;
>  
> -	if (ip_options_echo(&icmp_param->replyopts, skb))
> +	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
>  		return;
>  
>  	sk = icmp_xmit_lock(net);
> @@ -376,10 +375,10 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
>  	daddr = ipc.addr = rt->rt_src;
>  	ipc.opt = NULL;
>  	ipc.shtx.flags = 0;
> -	if (icmp_param->replyopts.optlen) {
> -		ipc.opt = &icmp_param->replyopts;
> -		if (ipc.opt->srr)
> -			daddr = icmp_param->replyopts.faddr;
> +	if (icmp_param->replyopts.opt.opt.optlen) {
> +		ipc.opt = &icmp_param->replyopts.opt;
> +		if (ipc.opt->opt.srr)
> +			daddr = icmp_param->replyopts.opt.opt.faddr;
>  	}
>  	{
>  		struct flowi fl = { .nl_u = { .ip4_u =
> @@ -516,7 +515,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>  					   IPTOS_PREC_INTERNETCONTROL) :
>  					  iph->tos;
>  
> -	if (ip_options_echo(&icmp_param.replyopts, skb_in))
> +	if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in))
>  		goto out_unlock;
>  
> 
> @@ -532,15 +531,15 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>  	icmp_param.offset = skb_network_offset(skb_in);
>  	inet_sk(sk)->tos = tos;
>  	ipc.addr = iph->saddr;
> -	ipc.opt = &icmp_param.replyopts;
> +	ipc.opt = &icmp_param.replyopts.opt;
>  	ipc.shtx.flags = 0;
>  
>  	{
>  		struct flowi fl = {
>  			.nl_u = {
>  				.ip4_u = {
> -					.daddr = icmp_param.replyopts.srr ?
> -						icmp_param.replyopts.faddr :
> +					.daddr = icmp_param.replyopts.opt.opt.srr ?
> +						icmp_param.replyopts.opt.opt.faddr :
>  						iph->saddr,
>  					.saddr = saddr,
>  					.tos = RT_TOS(tos)
> @@ -629,7 +628,7 @@ route_done:
>  	room = dst_mtu(&rt->u.dst);
>  	if (room > 576)
>  		room = 576;
> -	room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen;
> +	room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
>  	room -= sizeof(struct icmphdr);
>  
>  	icmp_param.data_len = skb_in->len - icmp_param.offset;
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index 537731b..a3bf986 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -356,11 +356,11 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
>  {
>  	struct rtable *rt;
>  	const struct inet_request_sock *ireq = inet_rsk(req);
> -	struct ip_options *opt = inet_rsk(req)->opt;
> +	struct ip_options_rcu *opt = inet_rsk(req)->opt;
>  	struct flowi fl = { .oif = sk->sk_bound_dev_if,
>  			    .nl_u = { .ip4_u =
> -				      { .daddr = ((opt && opt->srr) ?
> -						  opt->faddr :
> +				      { .daddr = ((opt && opt->opt.srr) ?
> +						  opt->opt.faddr :
>  						  ireq->rmt_addr),
>  					.saddr = ireq->loc_addr,
>  					.tos = RT_CONN_FLAGS(sk) } },
> @@ -374,7 +374,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
>  	security_req_classify_flow(req, &fl);
>  	if (ip_route_output_flow(net, &rt, &fl, sk, 0))
>  		goto no_route;
> -	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
> +	if (opt && opt->opt.is_strictroute && rt->rt_dst != rt->rt_gateway)
>  		goto route_err;
>  	return &rt->u.dst;
>  
> diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
> index 94bf105..8a95972 100644
> --- a/net/ipv4/ip_options.c
> +++ b/net/ipv4/ip_options.c
> @@ -35,7 +35,7 @@
>   * saddr is address of outgoing interface.
>   */
>  
> -void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
> +void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
>  			    __be32 daddr, struct rtable *rt, int is_frag)
>  {
>  	unsigned char *iph = skb_network_header(skb);
> @@ -82,9 +82,9 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
>   * NOTE: dopt cannot point to skb.
>   */
>  
> -int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
> +int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
>  {
> -	struct ip_options *sopt;
> +	const struct ip_options *sopt;
>  	unsigned char *sptr, *dptr;
>  	int soffset, doffset;
>  	int	optlen;
> @@ -94,10 +94,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
>  
>  	sopt = &(IPCB(skb)->opt);
>  
> -	if (sopt->optlen == 0) {
> -		dopt->optlen = 0;
> +	if (sopt->optlen == 0)
>  		return 0;
> -	}
>  
>  	sptr = skb_network_header(skb);
>  	dptr = dopt->__data;
> @@ -156,7 +154,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
>  		dopt->optlen += optlen;
>  	}
>  	if (sopt->srr) {
> -		unsigned char * start = sptr+sopt->srr;
> +		unsigned char *start = sptr+sopt->srr;
>  		__be32 faddr;
>  
>  		optlen  = start[1];
> @@ -499,19 +497,19 @@ void ip_options_undo(struct ip_options * opt)
>  	}
>  }
>  
> -static struct ip_options *ip_options_get_alloc(const int optlen)
> +static struct ip_options_rcu *ip_options_get_alloc(const int optlen)
>  {
> -	return kzalloc(sizeof(struct ip_options) + ((optlen + 3) & ~3),
> +	return kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3),
>  		       GFP_KERNEL);
>  }
>  
> -static int ip_options_get_finish(struct net *net, struct ip_options **optp,
> -				 struct ip_options *opt, int optlen)
> +static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp,
> +				 struct ip_options_rcu *opt, int optlen)
>  {
>  	while (optlen & 3)
> -		opt->__data[optlen++] = IPOPT_END;
> -	opt->optlen = optlen;
> -	if (optlen && ip_options_compile(net, opt, NULL)) {
> +		opt->opt.__data[optlen++] = IPOPT_END;
> +	opt->opt.optlen = optlen;
> +	if (optlen && ip_options_compile(net, &opt->opt, NULL)) {
>  		kfree(opt);
>  		return -EINVAL;
>  	}
> @@ -520,29 +518,29 @@ static int ip_options_get_finish(struct net *net, struct ip_options **optp,
>  	return 0;
>  }
>  
> -int ip_options_get_from_user(struct net *net, struct ip_options **optp,
> +int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
>  			     unsigned char __user *data, int optlen)
>  {
> -	struct ip_options *opt = ip_options_get_alloc(optlen);
> +	struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
>  
>  	if (!opt)
>  		return -ENOMEM;
> -	if (optlen && copy_from_user(opt->__data, data, optlen)) {
> +	if (optlen && copy_from_user(opt->opt.__data, data, optlen)) {
>  		kfree(opt);
>  		return -EFAULT;
>  	}
>  	return ip_options_get_finish(net, optp, opt, optlen);
>  }
>  
> -int ip_options_get(struct net *net, struct ip_options **optp,
> +int ip_options_get(struct net *net, struct ip_options_rcu **optp,
>  		   unsigned char *data, int optlen)
>  {
> -	struct ip_options *opt = ip_options_get_alloc(optlen);
> +	struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
>  
>  	if (!opt)
>  		return -ENOMEM;
>  	if (optlen)
> -		memcpy(opt->__data, data, optlen);
> +		memcpy(opt->opt.__data, data, optlen);
>  	return ip_options_get_finish(net, optp, opt, optlen);
>  }
>  
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index 44b7910..7dde039 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -137,14 +137,14 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
>   *
>   */
>  int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
> -			  __be32 saddr, __be32 daddr, struct ip_options *opt)
> +			  __be32 saddr, __be32 daddr, struct ip_options_rcu *opt)
>  {
>  	struct inet_sock *inet = inet_sk(sk);
>  	struct rtable *rt = skb_rtable(skb);
>  	struct iphdr *iph;
>  
>  	/* Build the IP header. */
> -	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
> +	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0));
>  	skb_reset_network_header(skb);
>  	iph = ip_hdr(skb);
>  	iph->version  = 4;
> @@ -160,9 +160,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
>  	iph->protocol = sk->sk_protocol;
>  	ip_select_ident(iph, &rt->u.dst, sk);
>  
> -	if (opt && opt->optlen) {
> -		iph->ihl += opt->optlen>>2;
> -		ip_options_build(skb, opt, daddr, rt, 0);
> +	if (opt && opt->opt.optlen) {
> +		iph->ihl += opt->opt.optlen>>2;
> +		ip_options_build(skb, &opt->opt, daddr, rt, 0);
>  	}
>  
>  	skb->priority = sk->sk_priority;
> @@ -312,9 +312,10 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
>  {
>  	struct sock *sk = skb->sk;
>  	struct inet_sock *inet = inet_sk(sk);
> -	struct ip_options *opt = inet->opt;
> +	struct ip_options_rcu *inet_opt = NULL;
>  	struct rtable *rt;
>  	struct iphdr *iph;
> +	int res;
>  
>  	/* Skip all of this if the packet is already routed,
>  	 * f.e. by something like SCTP.
> @@ -325,13 +326,15 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
>  
>  	/* Make sure we can route this packet. */
>  	rt = (struct rtable *)__sk_dst_check(sk, 0);
> +	rcu_read_lock();
> +	inet_opt = rcu_dereference(inet->inet_opt);
>  	if (rt == NULL) {
>  		__be32 daddr;
>  
>  		/* Use correct destination address if we have options. */
>  		daddr = inet->daddr;
> -		if(opt && opt->srr)
> -			daddr = opt->faddr;
> +		if (inet_opt && inet_opt->opt.srr)
> +			daddr = inet_opt->opt.faddr;
>  
>  		{
>  			struct flowi fl = { .oif = sk->sk_bound_dev_if,
> @@ -359,11 +362,11 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
>  	skb_dst_set(skb, dst_clone(&rt->u.dst));
>  
>  packet_routed:
> -	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
> +	if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_dst != rt->rt_gateway)
>  		goto no_route;
>  
>  	/* OK, we know where to send it, allocate and build IP header. */
> -	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
> +	skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0));
>  	skb_reset_network_header(skb);
>  	iph = ip_hdr(skb);
>  	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
> @@ -377,9 +380,9 @@ packet_routed:
>  	iph->daddr    = rt->rt_dst;
>  	/* Transport layer set skb->h.foo itself. */
>  
> -	if (opt && opt->optlen) {
> -		iph->ihl += opt->optlen >> 2;
> -		ip_options_build(skb, opt, inet->daddr, rt, 0);
> +	if (inet_opt && inet_opt->opt.optlen) {
> +		iph->ihl += inet_opt->opt.optlen >> 2;
> +		ip_options_build(skb, &inet_opt->opt, inet->daddr, rt, 0);
>  	}
>  
>  	ip_select_ident_more(iph, &rt->u.dst, sk,
> @@ -387,10 +390,12 @@ packet_routed:
>  
>  	skb->priority = sk->sk_priority;
>  	skb->mark = sk->sk_mark;
> -
> -	return ip_local_out(skb);
> +	res = ip_local_out(skb);
> +	rcu_read_unlock();
> +	return res;
>  
>  no_route:
> +	rcu_read_unlock();
>  	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
>  	kfree_skb(skb);
>  	return -EHOSTUNREACH;
> @@ -809,7 +814,7 @@ int ip_append_data(struct sock *sk,
>  		/*
>  		 * setup for corking.
>  		 */
> -		opt = ipc->opt;
> +		opt = ipc->opt ? &ipc->opt->opt : NULL;
>  		if (opt) {
>  			if (inet->cork.opt == NULL) {
>  				inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
> @@ -1367,26 +1372,23 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
>  		   unsigned int len)
>  {
>  	struct inet_sock *inet = inet_sk(sk);
> -	struct {
> -		struct ip_options	opt;
> -		char			data[40];
> -	} replyopts;
> +	struct ip_options_data replyopts;
>  	struct ipcm_cookie ipc;
>  	__be32 daddr;
>  	struct rtable *rt = skb_rtable(skb);
>  
> -	if (ip_options_echo(&replyopts.opt, skb))
> +	if (ip_options_echo(&replyopts.opt.opt, skb))
>  		return;
>  
>  	daddr = ipc.addr = rt->rt_src;
>  	ipc.opt = NULL;
>  	ipc.shtx.flags = 0;
>  
> -	if (replyopts.opt.optlen) {
> +	if (replyopts.opt.opt.optlen) {
>  		ipc.opt = &replyopts.opt;
>  
> -		if (ipc.opt->srr)
> -			daddr = replyopts.opt.faddr;
> +		if (replyopts.opt.opt.srr)
> +			daddr = replyopts.opt.opt.faddr;
>  	}
>  
>  	{
> diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
> index 184a7ad..099e6c3 100644
> --- a/net/ipv4/ip_sockglue.c
> +++ b/net/ipv4/ip_sockglue.c
> @@ -434,6 +434,11 @@ out:
>  }
>  
> 
> +static void opt_kfree_rcu(struct rcu_head *head)
> +{
> +	kfree(container_of(head, struct ip_options_rcu, rcu));
> +}
> +
>  /*
>   *	Socket option code for IP. This is the end of the line after any
>   *	TCP,UDP etc options on an IP socket.
> @@ -479,13 +484,15 @@ static int do_ip_setsockopt(struct sock *sk, int level,
>  	switch (optname) {
>  	case IP_OPTIONS:
>  	{
> -		struct ip_options *opt = NULL;
> +		struct ip_options_rcu *old, *opt = NULL;
> +
>  		if (optlen > 40 || optlen < 0)
>  			goto e_inval;
>  		err = ip_options_get_from_user(sock_net(sk), &opt,
>  					       optval, optlen);
>  		if (err)
>  			break;
> +		old = inet->inet_opt;
>  		if (inet->is_icsk) {
>  			struct inet_connection_sock *icsk = inet_csk(sk);
>  #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
> @@ -494,17 +501,18 @@ static int do_ip_setsockopt(struct sock *sk, int level,
>  			       (TCPF_LISTEN | TCPF_CLOSE)) &&
>  			     inet->daddr != LOOPBACK4_IPV6)) {
>  #endif
> -				if (inet->opt)
> -					icsk->icsk_ext_hdr_len -= inet->opt->optlen;
> +				if (old)
> +					icsk->icsk_ext_hdr_len -= old->opt.optlen;
>  				if (opt)
> -					icsk->icsk_ext_hdr_len += opt->optlen;
> +					icsk->icsk_ext_hdr_len += opt->opt.optlen;
>  				icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
>  #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
>  			}
>  #endif
>  		}
> -		opt = xchg(&inet->opt, opt);
> -		kfree(opt);
> +		rcu_assign_pointer(inet->inet_opt, opt);
> +		if (old)
> +			call_rcu(&old->rcu, opt_kfree_rcu);
>  		break;
>  	}
>  	case IP_PKTINFO:
> @@ -1032,12 +1040,15 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
>  	case IP_OPTIONS:
>  	{
>  		unsigned char optbuf[sizeof(struct ip_options)+40];
> -		struct ip_options * opt = (struct ip_options *)optbuf;
> +		struct ip_options *opt = (struct ip_options *)optbuf;
> +		struct ip_options_rcu *inet_opt;
> +
> +		inet_opt = inet->inet_opt;
>  		opt->optlen = 0;
> -		if (inet->opt)
> -			memcpy(optbuf, inet->opt,
> -			       sizeof(struct ip_options)+
> -			       inet->opt->optlen);
> +		if (inet_opt)
> +			memcpy(optbuf, &inet_opt->opt,
> +			       sizeof(struct ip_options) +
> +			       inet_opt->opt.optlen);
>  		release_sock(sk);
>  
>  		if (opt->optlen == 0)
> diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
> index ab996f9..07ab583 100644
> --- a/net/ipv4/raw.c
> +++ b/net/ipv4/raw.c
> @@ -459,6 +459,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>  	__be32 saddr;
>  	u8  tos;
>  	int err;
> +	struct ip_options_data opt_copy;
>  
>  	err = -EMSGSIZE;
>  	if (len > 0xFFFF)
> @@ -519,8 +520,18 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>  	saddr = ipc.addr;
>  	ipc.addr = daddr;
>  
> -	if (!ipc.opt)
> -		ipc.opt = inet->opt;
> +	if (!ipc.opt) {
> +		struct ip_options_rcu *inet_opt;
> +
> +		rcu_read_lock();
> +		inet_opt = rcu_dereference(inet->inet_opt);
> +		if (inet_opt) {
> +			memcpy(&opt_copy, inet_opt,
> +			       sizeof(*inet_opt) + inet_opt->opt.optlen);
> +			ipc.opt = &opt_copy.opt;
> +		}
> +		rcu_read_unlock();
> +	}
>  
>  	if (ipc.opt) {
>  		err = -EINVAL;
> @@ -529,10 +540,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>  		 */
>  		if (inet->hdrincl)
>  			goto done;
> -		if (ipc.opt->srr) {
> +		if (ipc.opt->opt.srr) {
>  			if (!daddr)
>  				goto done;
> -			daddr = ipc.opt->faddr;
> +			daddr = ipc.opt->opt.faddr;
>  		}
>  	}
>  	tos = RT_CONN_FLAGS(sk);
> diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
> index a6e0e07..0a94b64 100644
> --- a/net/ipv4/syncookies.c
> +++ b/net/ipv4/syncookies.c
> @@ -309,10 +309,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
>  	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
>  	 */
>  	if (opt && opt->optlen) {
> -		int opt_size = sizeof(struct ip_options) + opt->optlen;
> +		int opt_size = sizeof(struct ip_options_rcu) + opt->optlen;
>  
>  		ireq->opt = kmalloc(opt_size, GFP_ATOMIC);
> -		if (ireq->opt != NULL && ip_options_echo(ireq->opt, skb)) {
> +		if (ireq->opt != NULL && ip_options_echo(&ireq->opt->opt, skb)) {
>  			kfree(ireq->opt);
>  			ireq->opt = NULL;
>  		}
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 6a4e832..d746d3b3 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -152,6 +152,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
>  	__be32 daddr, nexthop;
>  	int tmp;
>  	int err;
> +	struct ip_options_rcu *inet_opt;
>  
>  	if (addr_len < sizeof(struct sockaddr_in))
>  		return -EINVAL;
> @@ -160,10 +161,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
>  		return -EAFNOSUPPORT;
>  
>  	nexthop = daddr = usin->sin_addr.s_addr;
> -	if (inet->opt && inet->opt->srr) {
> +	inet_opt = inet->inet_opt;
> +	if (inet_opt && inet_opt->opt.srr) {
>  		if (!daddr)
>  			return -EINVAL;
> -		nexthop = inet->opt->faddr;
> +		nexthop = inet_opt->opt.faddr;
>  	}
>  
>  	tmp = ip_route_connect(&rt, nexthop, inet->saddr,
> @@ -181,7 +183,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
>  		return -ENETUNREACH;
>  	}
>  
> -	if (!inet->opt || !inet->opt->srr)
> +	if (!inet_opt || !inet_opt->opt.srr)
>  		daddr = rt->rt_dst;
>  
>  	if (!inet->saddr)
> @@ -215,8 +217,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
>  	inet->daddr = daddr;
>  
>  	inet_csk(sk)->icsk_ext_hdr_len = 0;
> -	if (inet->opt)
> -		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
> +	if (inet_opt)
> +		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
>  
>  	tp->rx_opt.mss_clamp = 536;
>  
> @@ -802,17 +804,18 @@ static void syn_flood_warning(struct sk_buff *skb)
>  /*
>   * Save and compile IPv4 options into the request_sock if needed.
>   */
> -static struct ip_options *tcp_v4_save_options(struct sock *sk,
> -					      struct sk_buff *skb)
> +static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
> +						  struct sk_buff *skb)
>  {
> -	struct ip_options *opt = &(IPCB(skb)->opt);
> -	struct ip_options *dopt = NULL;
> +	const struct ip_options *opt = &(IPCB(skb)->opt);
> +	struct ip_options_rcu *dopt = NULL;
>  
>  	if (opt && opt->optlen) {
> -		int opt_size = optlength(opt);
> +		int opt_size = sizeof(*dopt) + opt->optlen;
> +
>  		dopt = kmalloc(opt_size, GFP_ATOMIC);
>  		if (dopt) {
> -			if (ip_options_echo(dopt, skb)) {
> +			if (ip_options_echo(&dopt->opt, skb)) {
>  				kfree(dopt);
>  				dopt = NULL;
>  			}
> @@ -1362,6 +1365,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
>  #ifdef CONFIG_TCP_MD5SIG
>  	struct tcp_md5sig_key *key;
>  #endif
> +	struct ip_options_rcu *inet_opt;
>  
>  	if (sk_acceptq_is_full(sk))
>  		goto exit_overflow;
> @@ -1382,13 +1386,14 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
>  	newinet->daddr	      = ireq->rmt_addr;
>  	newinet->rcv_saddr    = ireq->loc_addr;
>  	newinet->saddr	      = ireq->loc_addr;
> -	newinet->opt	      = ireq->opt;
> +	inet_opt	      = ireq->opt;
> +	rcu_assign_pointer(newinet->inet_opt, inet_opt);
>  	ireq->opt	      = NULL;
>  	newinet->mc_index     = inet_iif(skb);
>  	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
>  	inet_csk(newsk)->icsk_ext_hdr_len = 0;
> -	if (newinet->opt)
> -		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
> +	if (inet_opt)
> +		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
>  	newinet->id = newtp->write_seq ^ jiffies;
>  
>  	tcp_mtup_init(newsk);
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 8e28770..af559e0 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -592,6 +592,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>  	int err, is_udplite = IS_UDPLITE(sk);
>  	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
>  	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
> +	struct ip_options_data opt_copy;
>  
>  	if (len > 0xFFFF)
>  		return -EMSGSIZE;
> @@ -663,22 +664,32 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>  			free = 1;
>  		connected = 0;
>  	}
> -	if (!ipc.opt)
> -		ipc.opt = inet->opt;
> +	if (!ipc.opt) {
> +		struct ip_options_rcu *inet_opt;
> +
> +		rcu_read_lock();
> +		inet_opt = rcu_dereference(inet->inet_opt);
> +		if (inet_opt) {
> +			memcpy(&opt_copy, inet_opt,
> +			       sizeof(*inet_opt) + inet_opt->opt.optlen);
> +			ipc.opt = &opt_copy.opt;
> +		}
> +		rcu_read_unlock();
> +	}
>  
>  	saddr = ipc.addr;
>  	ipc.addr = faddr = daddr;
>  
> -	if (ipc.opt && ipc.opt->srr) {
> +	if (ipc.opt && ipc.opt->opt.srr) {
>  		if (!daddr)
>  			return -EINVAL;
> -		faddr = ipc.opt->faddr;
> +		faddr = ipc.opt->opt.faddr;
>  		connected = 0;
>  	}
>  	tos = RT_TOS(inet->tos);
>  	if (sock_flag(sk, SOCK_LOCALROUTE) ||
>  	    (msg->msg_flags & MSG_DONTROUTE) ||
> -	    (ipc.opt && ipc.opt->is_strictroute)) {
> +	    (ipc.opt && ipc.opt->opt.is_strictroute)) {
>  		tos |= RTO_ONLINK;
>  		connected = 0;
>  	}
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index faae6df..1b25191 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -1391,7 +1391,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
>  
>  	   First: no IPv4 options.
>  	 */
> -	newinet->opt = NULL;
> +	newinet->inet_opt = NULL;
>  	newnp->ipv6_fl_list = NULL;
>  
>  	/* Clone RX bits */

-- 
Ben Hutchings
Theory and practice are closer in theory than in practice.
                                - John Levine, moderator of comp.compilers

Download attachment "signature.asc" of type "application/pgp-signature" (829 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ