lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 7 Jul 2009 12:33:25 -0600
From:	"Tantilov, Emil S" <emil.s.tantilov@...el.com>
To:	Eric Dumazet <eric.dumazet@...il.com>,
	"David S. Miller" <davem@...emloft.net>
CC:	Emil S Tantilov <emils.tantilov@...il.com>,
	NetDev <netdev@...r.kernel.org>,
	"Brandeburg, Jesse" <jesse.brandeburg@...el.com>,
	"Kirsher, Jeffrey T" <jeffrey.t.kirsher@...el.com>,
	Jiri Olsa <jolsa@...hat.com>
Subject: RE: [PATCH] net: sk_alloc() should not blindly overwrite memory

Eric Dumazet wrote:
> Eric Dumazet a écrit :
>> 
>> 
>> Hold on, I found the problem and will submit a patch (after testing
>> it) in following hours 
>> 
> 
> David, while looking at Emil case, I found following problem.
> 
> I am not sure this bug is responsible for the BUG hit by Emil,
> but it seems a candidate for stable as well...
> 
> Thanks
> 
> [PATCH] net: sk_alloc() should not blindly overwrite memory
> 
> Some sockets use SLAB_DESTROY_BY_RCU, and our RCU code rely that some
> fields should not be blindly overwritten, even with null.
> 
> These fields are sk->sk_refcnt and sk->sk_nulls_node.next
> 
> Current sk_alloc() implementation doesnt respect this hypothesis,
> calling kmem_alloc with __GFP_ZERO and setting sk_refcnt to 1 instead
> of atomically increment it.
> 
> Reported-by: Emil S Tantilov <emils.tantilov@...il.com>
> Signed-off-by: Eric Dumazet <eric.dumazet@...il.com>
> ---
>  include/net/sock.h            |   24 +++++++++++-------
>  net/core/sock.c               |   41 ++++++++++++++++++++++++++++----
>  net/ipv4/inet_timewait_sock.c |    2 -
>  3 files changed, 52 insertions(+), 15 deletions(-)
> 
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 352f06b..3f6284e 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -103,15 +103,15 @@ struct net;
> 
>  /**
>   *	struct sock_common - minimal network layer representation of
> sockets + *	@skc_node: main hash linkage for various protocol lookup
> tables + *	@skc_nulls_node: main hash linkage for UDP/UDP-Lite
> protocol + *	@skc_refcnt: reference count
> + *	@skc_hash: hash value used with various protocol lookup tables
>   *	@skc_family: network address family
>   *	@skc_state: Connection state
>   *	@skc_reuse: %SO_REUSEADDR setting
>   *	@skc_bound_dev_if: bound device index if != 0
> - *	@skc_node: main hash linkage for various protocol lookup tables
> - *	@skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol
>   *	@skc_bind_node: bind hash linkage for various protocol lookup
> tables 
> - *	@skc_refcnt: reference count
> - *	@skc_hash: hash value used with various protocol lookup tables
>   *	@skc_prot: protocol handlers inside a network family
>   *	@skc_net: reference to the network namespace of this socket
>   *
> @@ -119,17 +119,23 @@ struct net;
>   *	for struct sock and struct inet_timewait_sock.
>   */
>  struct sock_common {
> -	unsigned short		skc_family;
> -	volatile unsigned char	skc_state;
> -	unsigned char		skc_reuse;
> -	int			skc_bound_dev_if;
> +	/*
> +	 * WARNING : skc_node, skc_refcnt must be first elements of this
> struct +	 * (sk_prot_alloc() should not clear skc_node.next &
> skc_refcnt because +	 *  of RCU lookups)
> +	 */
>  	union {
>  		struct hlist_node	skc_node;
>  		struct hlist_nulls_node skc_nulls_node;
>  	};
> -	struct hlist_node	skc_bind_node;
>  	atomic_t		skc_refcnt;
> +
>  	unsigned int		skc_hash;
> +	unsigned short		skc_family;
> +	volatile unsigned char	skc_state;
> +	unsigned char		skc_reuse;
> +	int			skc_bound_dev_if;
> +	struct hlist_node	skc_bind_node;
>  	struct proto		*skc_prot;
>  #ifdef CONFIG_NET_NS
>  	struct net	 	*skc_net;
> diff --git a/net/core/sock.c b/net/core/sock.c
> index b0ba569..daadcc5 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -994,8 +994,25 @@ struct sock *sk_alloc(struct net *net, int
>  family, gfp_t priority, {
>  	struct sock *sk;
> 
> -	sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
> +	sk = sk_prot_alloc(prot, priority, family);
>  	if (sk) {
> +		/*
> +		 * caches using SLAB_DESTROY_BY_RCU should let sk_node
> +		 * and sk_refcnt untouched.
> +		 *
> +		 * Following makes sense only if sk first fields are
> +		 * in following order : sk_node, refcnt, sk_hash
> +		 */
> +		BUILD_BUG_ON(offsetof(struct sock, sk_node) != 0);
> +		BUILD_BUG_ON(offsetof(struct sock, sk_refcnt) !=
> +			     sizeof(sk->sk_node));
> +		BUILD_BUG_ON(offsetof(struct sock, sk_hash) !=
> +			     sizeof(sk->sk_node) + sizeof(sk->sk_refcnt));
> +		if (prot->slab_flags & SLAB_DESTROY_BY_RCU)
> +			memset(&sk->sk_hash, 0,
> +			       prot->obj_size - offsetof(struct sock, sk_hash));
> +		else
> +			memset(sk, 0, prot->obj_size);
>  		sk->sk_family = family;
>  		/*
>  		 * See comment in struct sock definition to understand
> @@ -1125,7 +1142,7 @@ struct sock *sk_clone(const struct sock *sk,
> const gfp_t priority) 
> 
>  		newsk->sk_err	   = 0;
>  		newsk->sk_priority = 0;
> -		atomic_set(&newsk->sk_refcnt, 2);
> +		atomic_add(2, &newsk->sk_refcnt);
> 
>  		/*
>  		 * Increment the counter in the same struct proto as the master
> @@ -1840,7 +1857,7 @@ void sock_init_data(struct socket *sock, struct
> sock *sk) 
> 
>  	sk->sk_stamp = ktime_set(-1L, 0);
> 
> -	atomic_set(&sk->sk_refcnt, 1);
> +	atomic_inc(&sk->sk_refcnt);
>  	atomic_set(&sk->sk_wmem_alloc, 1);
>  	atomic_set(&sk->sk_drops, 0);
>  }
> @@ -2140,12 +2157,25 @@ static inline void release_proto_idx(struct
>  proto *prot) }
>  #endif
> 
> +/*
> + * We need to initialize skc->skc_refcnt to 0, and
> skc->skc_node.pprev to NULL + * but only on newly allocated objects
> (check sk_prot_alloc()) + */
> +static void sk_init_once(void *arg)
> +{
> +	struct sock_common *skc = arg;
> +
> +	atomic_set(&skc->skc_refcnt, 0);
> +	skc->skc_node.pprev = NULL;
> +}
> +
>  int proto_register(struct proto *prot, int alloc_slab)
>  {
>  	if (alloc_slab) {
>  		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
>  					SLAB_HWCACHE_ALIGN | prot->slab_flags,
> -					NULL);
> +					prot->slab_flags & SLAB_DESTROY_BY_RCU ?
> +					    sk_init_once : NULL);
> 
>  		if (prot->slab == NULL) {
>  			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
> @@ -2187,7 +2217,8 @@ int proto_register(struct proto *prot, int
>  						  alloc_slab) 0,
>  						  SLAB_HWCACHE_ALIGN |
>  							prot->slab_flags,
> -						  NULL);
> +						  prot->slab_flags & SLAB_DESTROY_BY_RCU ?
> +							sk_init_once : NULL);
>  			if (prot->twsk_prot->twsk_slab == NULL)
>  				goto out_free_timewait_sock_slab_name;
>  		}
> diff --git a/net/ipv4/inet_timewait_sock.c
> b/net/ipv4/inet_timewait_sock.c 
> index 61283f9..a2997df 100644
> --- a/net/ipv4/inet_timewait_sock.c
> +++ b/net/ipv4/inet_timewait_sock.c
> @@ -139,7 +139,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const
>  		struct sock *sk, const int stat tw->tw_transparent  =
>  		inet->transparent; tw->tw_prot	    = sk->sk_prot_creator;
>  		twsk_net_set(tw, hold_net(sock_net(sk)));
> -		atomic_set(&tw->tw_refcnt, 1);
> +		atomic_inc(&tw->tw_refcnt);
>  		inet_twsk_dead_node_init(tw);
>  		__module_get(tw->tw_prot->owner);
>  	}

Eric,

With this patch applied, I get panic on boot:

[    5.334653] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
[    5.344225] IP: [<ffffffff811ed364>] selinux_socket_post_create+0x78/0x95
[    5.352263] PGD 0 
[    5.354952] Oops: 0002 [#1] SMP DEBUG_PAGEALLOC
[    5.360599] last sysfs file: 
[    5.364292] CPU 0 
[    5.366985] Modules linked in:
[    5.370836] Pid: 1, comm: swapper Not tainted 2.6.31-rc1-net-2.6-igb-ed-patch-07071123 #2 S5520HC
[    5.381465] RIP: 0010:[<ffffffff811ed364>]  [<ffffffff811ed364>] selinux_socket_post_create+0x78/0x95
[    5.392551] RSP: 0018:ffff8801ef0a5d60  EFLAGS: 00010286
[    5.398871] RAX: 0000000000000001 RBX: ffff88036e5ace80 RCX: 0000000000000006
[    5.407227] RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000002
[    5.415587] RBP: ffff8801ef0a5d80 R08: 0000000000000001 R09: 0000000000000000
[    5.423945] R10: ffff88036e000000 R11: ffff8801ef0a5c20 R12: ffff88036ec09c80
[    5.432300] R13: 0000000000000002 R14: 0000000000000002 R15: 0000000000000003
[    5.440662] FS:  0000000000000000(0000) GS:ffffc90000000000(0000) knlGS:0000000000000000
[    5.450386] CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
[    5.457189] CR2: 0000000000000010 CR3: 0000000001001000 CR4: 00000000000006b0
[    5.465545] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    5.473908] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[    5.482267] Process swapper (pid: 1, threadinfo ffff8801ef0a4000, task ffff88036f040000)
[    5.491994] Stack:
[    5.494606]  01ffffff816d0040 00000000ffffff9f ffffffff817244a0 ffff88036ec09c80
[    5.503262] <0> ffff8801ef0a5d90 ffffffff811e5ef0 ffff8801ef0a5df0 ffffffff8138c041
[    5.512853] <0> ffffffff8138bf93 00000001ef0a5de0 ffff8801ef0a5e18 0000000681478fb3
[    5.523046] Call Trace:
[    5.526158]  [<ffffffff811e5ef0>] security_socket_post_create+0x11/0x13
[    5.533932]  [<ffffffff8138c041>] __sock_create+0x1a4/0x1ff
[    5.540541]  [<ffffffff8138bf93>] ? __sock_create+0xf6/0x1ff
[    5.547243]  [<ffffffff8184028c>] ? inet_init+0x0/0x204
[    5.553463]  [<ffffffff8138c0bb>] sock_create_kern+0x1f/0x21
[    5.560162]  [<ffffffff813edd5a>] inet_ctl_sock_create+0x29/0x5d
[    5.567255]  [<ffffffff8184028c>] ? inet_init+0x0/0x204
[    5.573473]  [<ffffffff8183fb60>] tcp_sk_init+0x25/0x27
[    5.579687]  [<ffffffff81396716>] register_pernet_operations+0x18/0x1a
[    5.587365]  [<ffffffff81396824>] register_pernet_subsys+0x29/0x3d
[    5.594657]  [<ffffffff8184028c>] ? inet_init+0x0/0x204
[    5.600882]  [<ffffffff8183fb27>] tcp_v4_init+0x1c/0x30
[    5.607106]  [<ffffffff818403cd>] inet_init+0x141/0x204
[    5.613333]  [<ffffffff8100905c>] do_one_initcall+0x56/0x130
[    5.620038]  [<ffffffff810a2a72>] ? register_irq_proc+0xae/0xca
[    5.627041]  [<ffffffff81130000>] ? d_add+0x16/0x1d
[    5.632875]  [<ffffffff81811915>] kernel_init+0x15e/0x1b4
[    5.639294]  [<ffffffff81028b9a>] child_rip+0xa/0x20
[    5.645214]  [<ffffffff8102857c>] ? restore_args+0x0/0x30
[    5.651622]  [<ffffffff818117b7>] ? kernel_init+0x0/0x1b4
[    5.658033]  [<ffffffff81028b90>] ? child_rip+0x0/0x20
[    5.664158] Code: ca 44 89 ef e8 d8 b6 ff ff c6 43 22 01 66 89 43 20 31 c0 49 8b 54 24 60 48 85 d2 74 22 8b 43 1c 48 8b 92 a8 03 00 00 41 0f b7 f5 <89> 42 10 8b 43 20 66 89 42 18 49 8b 7c 24 60 e8 de 4a 00 00 41 
[    5.690779] RIP  [<ffffffff811ed364>] selinux_socket_post_create+0x78/0x95
[    5.698921]  RSP <ffff8801ef0a5d60>
[    5.703202] CR2: 0000000000000010
[    5.707298] ---[ end trace a7919e7f17c0a725 ]---
[    5.712848] swapper used greatest stack depth: 4680 bytes left
[    5.727678] Kernel panic - not syncing: Attempted to kill init!
[    5.734672] Pid: 1, comm: swapper Tainted: G      D    2.6.31-rc1-net-2.6-igb-ed-patch-07071123 #2
[    5.745375] Call Trace:
[    5.748485]  [<ffffffff81057cfd>] panic+0xdb/0x190
[    5.754217]  [<ffffffff8105ae3b>] ? do_exit+0x35f/0x6d8
[    5.760439]  [<ffffffff8105adeb>] ? do_exit+0x30f/0x6d8
[    5.766659]  [<ffffffff8105adeb>] ? do_exit+0x30f/0x6d8
[    5.772875]  [<ffffffff8107bdbb>] ? trace_hardirqs_on+0xd/0xf
[    5.779688]  [<ffffffff8147a295>] ? _write_unlock_irq+0x2b/0x31
[    5.786690]  [<ffffffff8105ab55>] do_exit+0x79/0x6d8
[    5.792622]  [<ffffffff8147b07d>] oops_end+0xb2/0xba
[    5.798555]  [<ffffffff810407b4>] no_context+0x1ef/0x1fe
[    5.804875]  [<ffffffff8107ba3b>] ? mark_lock+0x22/0x1fb
[    5.811196]  [<ffffffff8107af82>] ? save_trace+0x3f/0x96
[    5.817516]  [<ffffffff81040a03>] __bad_area_nosemaphore+0x186/0x1a9
[    5.825009]  [<ffffffff813e5bbf>] ? raw_hash_sk+0x2a/0x73
[    5.831421]  [<ffffffff8147c354>] ? do_page_fault+0xbd/0x22e
[    5.838128]  [<ffffffff81040a9c>] bad_area_nosemaphore+0xe/0x10
[    5.845132]  [<ffffffff8147c3b6>] do_page_fault+0x11f/0x22e
[    5.851747]  [<ffffffff8147a4ef>] page_fault+0x1f/0x30
[    5.857871]  [<ffffffff811ed364>] ? selinux_socket_post_create+0x78/0x95
[    5.865754]  [<ffffffff811e5ef0>] security_socket_post_create+0x11/0x13
[    5.873536]  [<ffffffff8138c041>] __sock_create+0x1a4/0x1ff
[    5.880149]  [<ffffffff8138bf93>] ? __sock_create+0xf6/0x1ff
[    5.886859]  [<ffffffff8184028c>] ? inet_init+0x0/0x204
[    5.893072]  [<ffffffff8138c0bb>] sock_create_kern+0x1f/0x21
[    5.899774]  [<ffffffff813edd5a>] inet_ctl_sock_create+0x29/0x5d
[    5.906863]  [<ffffffff8184028c>] ? inet_init+0x0/0x204
[    5.913080]  [<ffffffff8183fb60>] tcp_sk_init+0x25/0x27
[    5.919299]  [<ffffffff81396716>] register_pernet_operations+0x18/0x1a
[    5.926986]  [<ffffffff81396824>] register_pernet_subsys+0x29/0x3d
[    5.934274]  [<ffffffff8184028c>] ? inet_init+0x0/0x204
[    5.940504]  [<ffffffff8183fb27>] tcp_v4_init+0x1c/0x30
[    5.946730]  [<ffffffff818403cd>] inet_init+0x141/0x204
[    5.952957]  [<ffffffff8100905c>] do_one_initcall+0x56/0x130
[    5.959668]  [<ffffffff810a2a72>] ? register_irq_proc+0xae/0xca
[    5.966666]  [<ffffffff81130000>] ? d_add+0x16/0x1d
[    5.972495]  [<ffffffff81811915>] kernel_init+0x15e/0x1b4
[    5.978909]  [<ffffffff81028b9a>] child_rip+0xa/0x20
[    5.984845]  [<ffffffff8102857c>] ? restore_args+0x0/0x30
[    5.991266]  [<ffffffff818117b7>] ? kernel_init+0x0/0x1b4
[    5.997686]  [<ffffffff81028b90>] ? child_rip+0x0/0x20

Emil

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ