lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Wed, 08 Jul 2009 00:33:29 +0200 From: Eric Dumazet <eric.dumazet@...il.com> To: "Tantilov, Emil S" <emil.s.tantilov@...el.com> CC: "David S. Miller" <davem@...emloft.net>, Emil S Tantilov <emils.tantilov@...il.com>, NetDev <netdev@...r.kernel.org>, "Brandeburg, Jesse" <jesse.brandeburg@...el.com>, "Kirsher, Jeffrey T" <jeffrey.t.kirsher@...el.com>, Jiri Olsa <jolsa@...hat.com> Subject: [PATCH] net: sk_prot_alloc() should not blindly overwrite memory Tantilov, Emil S a écrit : > Eric Dumazet wrote: >> Eric Dumazet a écrit : >>> >>> Hold on, I found the problem and will submit a patch (after testing >>> it) in following hours >>> >> David, while looking at Emil case, I found following problem. >> >> I am not sure this bug is responsible for the BUG hit by Emil, >> but it seems a candidate for stable as well... >> >> Thanks >> >> [PATCH] net: sk_alloc() should not blindly overwrite memory >> >> Some sockets use SLAB_DESTROY_BY_RCU, and our RCU code rely that some >> fields should not be blindly overwritten, even with null. >> >> These fields are sk->sk_refcnt and sk->sk_nulls_node.next >> >> Current sk_alloc() implementation doesnt respect this hypothesis, >> calling kmem_alloc with __GFP_ZERO and setting sk_refcnt to 1 instead >> of atomically increment it. >> >> Reported-by: Emil S Tantilov <emils.tantilov@...il.com> >> Signed-off-by: Eric Dumazet <eric.dumazet@...il.com> > > Eric, > > With this patch applied, I get panic on boot: Oops, this stuff has to be done inside sk_prot_alloc() or breaks selinux Thanks for testing Emil, here is a second try ! [PATCH] net: sk_prot_alloc() should not blindly overwrite memory Some sockets use SLAB_DESTROY_BY_RCU, and our RCU code rely that some fields should not be blindly overwritten, even with null. These fields are sk->sk_refcnt and sk->sk_nulls_node.next Current sk_prot_alloc() implementation doesnt respect this hypothesis, calling kmem_cache_alloc() with __GFP_ZERO and setting sk_refcnt to 1 instead of atomically increment it. Reported-by: Emil S Tantilov <emils.tantilov@...il.com> Signed-off-by: Eric Dumazet <eric.dumazet@...il.com> --- include/net/sock.h | 24 +++++++++------ net/core/sock.c | 49 ++++++++++++++++++++++++++++---- net/ipv4/inet_timewait_sock.c | 2 - 3 files changed, 59 insertions(+), 16 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 352f06b..3f6284e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -103,15 +103,15 @@ struct net; /** * struct sock_common - minimal network layer representation of sockets + * @skc_node: main hash linkage for various protocol lookup tables + * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol + * @skc_refcnt: reference count + * @skc_hash: hash value used with various protocol lookup tables * @skc_family: network address family * @skc_state: Connection state * @skc_reuse: %SO_REUSEADDR setting * @skc_bound_dev_if: bound device index if != 0 - * @skc_node: main hash linkage for various protocol lookup tables - * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol * @skc_bind_node: bind hash linkage for various protocol lookup tables - * @skc_refcnt: reference count - * @skc_hash: hash value used with various protocol lookup tables * @skc_prot: protocol handlers inside a network family * @skc_net: reference to the network namespace of this socket * @@ -119,17 +119,23 @@ struct net; * for struct sock and struct inet_timewait_sock. */ struct sock_common { - unsigned short skc_family; - volatile unsigned char skc_state; - unsigned char skc_reuse; - int skc_bound_dev_if; + /* + * WARNING : skc_node, skc_refcnt must be first elements of this struct + * (sk_prot_alloc() should not clear skc_node.next & skc_refcnt because + * of RCU lookups) + */ union { struct hlist_node skc_node; struct hlist_nulls_node skc_nulls_node; }; - struct hlist_node skc_bind_node; atomic_t skc_refcnt; + unsigned int skc_hash; + unsigned short skc_family; + volatile unsigned char skc_state; + unsigned char skc_reuse; + int skc_bound_dev_if; + struct hlist_node skc_bind_node; struct proto *skc_prot; #ifdef CONFIG_NET_NS struct net *skc_net; diff --git a/net/core/sock.c b/net/core/sock.c index b0ba569..cf43ff1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -939,8 +939,31 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, struct kmem_cache *slab; slab = prot->slab; - if (slab != NULL) - sk = kmem_cache_alloc(slab, priority); + if (slab != NULL) { + /* + * caches using SLAB_DESTROY_BY_RCU should let sk_node + * and sk_refcnt untouched. + * + * Following makes sense only if sk first fields are + * in following order : sk_node, refcnt, sk_hash + */ + BUILD_BUG_ON(offsetof(struct sock, sk_node) != 0); + BUILD_BUG_ON(offsetof(struct sock, sk_refcnt) != + sizeof(sk->sk_node)); + BUILD_BUG_ON(offsetof(struct sock, sk_hash) != + sizeof(sk->sk_node) + sizeof(sk->sk_refcnt)); + sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO); + if (!sk) + return sk; + if (priority & __GFP_ZERO) { + if (prot->slab_flags & SLAB_DESTROY_BY_RCU) + memset(&sk->sk_hash, 0, + prot->obj_size - + offsetof(struct sock, sk_hash)); + else + memset(sk, 0, prot->obj_size); + } + } else sk = kmalloc(prot->obj_size, priority); @@ -1125,7 +1148,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) newsk->sk_err = 0; newsk->sk_priority = 0; - atomic_set(&newsk->sk_refcnt, 2); + atomic_add(2, &newsk->sk_refcnt); /* * Increment the counter in the same struct proto as the master @@ -1840,7 +1863,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); - atomic_set(&sk->sk_refcnt, 1); + atomic_inc(&sk->sk_refcnt); atomic_set(&sk->sk_wmem_alloc, 1); atomic_set(&sk->sk_drops, 0); } @@ -2140,12 +2163,25 @@ static inline void release_proto_idx(struct proto *prot) } #endif +/* + * We need to initialize skc->skc_refcnt to 0, and skc->skc_node.pprev to NULL + * but only on newly allocated objects (check sk_prot_alloc()) + */ +static void sk_init_once(void *arg) +{ + struct sock_common *skc = arg; + + atomic_set(&skc->skc_refcnt, 0); + skc->skc_node.pprev = NULL; +} + int proto_register(struct proto *prot, int alloc_slab) { if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, SLAB_HWCACHE_ALIGN | prot->slab_flags, - NULL); + prot->slab_flags & SLAB_DESTROY_BY_RCU ? + sk_init_once : NULL); if (prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", @@ -2187,7 +2223,8 @@ int proto_register(struct proto *prot, int alloc_slab) 0, SLAB_HWCACHE_ALIGN | prot->slab_flags, - NULL); + prot->slab_flags & SLAB_DESTROY_BY_RCU ? + sk_init_once : NULL); if (prot->twsk_prot->twsk_slab == NULL) goto out_free_timewait_sock_slab_name; } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 61283f9..a2997df 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -139,7 +139,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_transparent = inet->transparent; tw->tw_prot = sk->sk_prot_creator; twsk_net_set(tw, hold_net(sock_net(sk))); - atomic_set(&tw->tw_refcnt, 1); + atomic_inc(&tw->tw_refcnt); inet_twsk_dead_node_init(tw); __module_get(tw->tw_prot->owner); } -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists