lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 15 Apr 2010 22:30:32 +0200
From:	Eric Dumazet <eric.dumazet@...il.com>
To:	David Miller <davem@...emloft.net>
Cc:	krkumar2@...ibm.com, netdev@...r.kernel.org,
	nuclearcat@...learcat.com
Subject: Re: NULL pointer dereference panic in stable (2.6.33.2), amd64

Le jeudi 15 avril 2010 à 02:06 -0700, David Miller a écrit :
> From: Eric Dumazet <eric.dumazet@...il.com>
> Date: Thu, 15 Apr 2010 10:51:47 +0200
> 
> > In any case, I think there is a fundamental problem with this sk
> > caching. Because one packet can travel in many stacked devices before
> > hitting the wire.
> > 
> > (bonding, vlan, ethernet) for example.
> > 
> > Socket cache is meaningfull for one level only...
> 
> We were talking the other day about that 'tun' change to orphan the
> SKB on TX, and I mentioned the possibility of just doing this in some
> generic location before we give the packet to the device ->xmit()
> method.
> 
> Such a scheme could help with this problem too.

Same thing we did with 

if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
	skb_dst_drop(skb);

in dev_hard_start_xmit() ?

Problem is this skb_tstamp_tx() thing....

One possibility would be to change skb_orphan() to let everything done
by destructor. 

One more argument would let destructor() know if this is the final
destructor() called from skb_release_head_state()

destructor() would be responsible to set skb->destructor and/or skb->sk
to NULL when possible. 


All normal destructors would not care of this 2nd argument and just do
what they actually do, plus setting skb->sk = NULL, skb->destructor =
NULL

Fast path would stay as today, no extra test.

Only tstamp users would need to setup another destructor, a bit more
complex (it would have to take a look at 2nd argument before really
doing the job)


Completely untested patch to get the idea :
(to be completed for the tstamp thing)


 include/linux/skbuff.h         |    8 +++-----
 include/net/sctp/sctp.h        |    2 +-
 include/net/sock.h             |    4 ++--
 net/caif/caif_socket.c         |    4 +++-
 net/core/dev.c                 |   26 +++++++++-----------------
 net/core/skbuff.c              |    2 +-
 net/core/sock.c                |    8 ++++++--
 net/l2tp/l2tp_core.c           |    4 +++-
 net/netfilter/nf_tproxy_core.c |    2 +-
 net/packet/af_packet.c         |    4 ++--
 net/unix/af_unix.c             |    4 ++--
 11 files changed, 33 insertions(+), 35 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 38501d2..7dfd833 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -353,7 +353,7 @@ struct sk_buff {
 	kmemcheck_bitfield_end(flags1);
 	__be16			protocol;
 
-	void			(*destructor)(struct sk_buff *skb);
+	void			(*destructor)(struct sk_buff *skb, int final);
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	struct nf_conntrack	*nfct;
 	struct sk_buff		*nfct_reasm;
@@ -1407,15 +1407,13 @@ static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len)
  *	@skb: buffer to orphan
  *
  *	If a buffer currently has an owner then we call the owner's
- *	destructor function and make the @skb unowned. The buffer continues
+ *	destructor function. The buffer continues
  *	to exist but is no longer charged to its former owner.
  */
 static inline void skb_orphan(struct sk_buff *skb)
 {
 	if (skb->destructor)
-		skb->destructor(skb);
-	skb->destructor = NULL;
-	skb->sk		= NULL;
+		skb->destructor(skb, 0);
 }
 
 /**
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 5915155..49e2162 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -130,7 +130,7 @@ int sctp_inet_listen(struct socket *sock, int backlog);
 void sctp_write_space(struct sock *sk);
 unsigned int sctp_poll(struct file *file, struct socket *sock,
 		poll_table *wait);
-void sctp_sock_rfree(struct sk_buff *skb);
+void sctp_sock_rfree(struct sk_buff *skb, int final);
 void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 		    struct sctp_association *asoc);
 extern struct percpu_counter sctp_sockets_allocated;
diff --git a/include/net/sock.h b/include/net/sock.h
index 56df440..a042c9d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -988,8 +988,8 @@ extern struct sk_buff		*sock_wmalloc(struct sock *sk,
 extern struct sk_buff		*sock_rmalloc(struct sock *sk,
 					      unsigned long size, int force,
 					      gfp_t priority);
-extern void			sock_wfree(struct sk_buff *skb);
-extern void			sock_rfree(struct sk_buff *skb);
+extern void			sock_wfree(struct sk_buff *skb, int final);
+extern void			sock_rfree(struct sk_buff *skb, int final);
 
 extern int			sock_setsockopt(struct socket *sock, int level,
 						int op, char __user *optval,
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index cdf62b9..4e7276a 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -256,10 +256,12 @@ static void caif_sktflowctrl_cb(struct cflayer *layr,
 	}
 }
 
-static void skb_destructor(struct sk_buff *skb)
+static void skb_destructor(struct sk_buff *skb, int final)
 {
 	dbfs_atomic_inc(&cnt.skb_free);
 	dbfs_atomic_dec(&cnt.skb_in_use);
+	skb->sk = NULL;
+	skb->destructor = NULL;
 }
 
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 876b111..9bffbe5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1828,12 +1828,12 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 }
 
 struct dev_gso_cb {
-	void (*destructor)(struct sk_buff *skb);
+	void (*destructor)(struct sk_buff *skb, int final);
 };
 
 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
 
-static void dev_gso_skb_destructor(struct sk_buff *skb)
+static void dev_gso_skb_destructor(struct sk_buff *skb, int final)
 {
 	struct dev_gso_cb *cb;
 
@@ -1847,7 +1847,9 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
 
 	cb = DEV_GSO_CB(skb);
 	if (cb->destructor)
-		cb->destructor(skb);
+		cb->destructor(skb, final);
+	skb->sk = NULL;
+	skb->destructor = NULL;
 }
 
 /**
@@ -1904,23 +1906,11 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
 			skb_dst_drop(skb);
 
+		skb_orphan(skb);
+
 		rc = ops->ndo_start_xmit(skb, dev);
 		if (rc == NETDEV_TX_OK)
 			txq_trans_update(txq);
-		/*
-		 * TODO: if skb_orphan() was called by
-		 * dev->hard_start_xmit() (for example, the unmodified
-		 * igb driver does that; bnx2 doesn't), then
-		 * skb_tx_software_timestamp() will be unable to send
-		 * back the time stamp.
-		 *
-		 * How can this be prevented? Always create another
-		 * reference to the socket before calling
-		 * dev->hard_start_xmit()? Prevent that skb_orphan()
-		 * does anything in dev->hard_start_xmit() by clearing
-		 * the skb destructor before the call and restoring it
-		 * afterwards, then doing the skb_orphan() ourselves?
-		 */
 		return rc;
 	}
 
@@ -1938,6 +1928,8 @@ gso:
 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
 			skb_dst_drop(nskb);
 
+		skb_orphan(nskb);
+
 		rc = ops->ndo_start_xmit(nskb, dev);
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bdea0ef..90c171f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -396,7 +396,7 @@ static void skb_release_head_state(struct sk_buff *skb)
 #endif
 	if (skb->destructor) {
 		WARN_ON(in_irq());
-		skb->destructor(skb);
+		skb->destructor(skb, 1);
 	}
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(skb->nfct);
diff --git a/net/core/sock.c b/net/core/sock.c
index 7effa1e..fcf67ea 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1259,7 +1259,7 @@ void __init sk_init(void)
 /*
  * Write buffer destructor automatically called from kfree_skb.
  */
-void sock_wfree(struct sk_buff *skb)
+void sock_wfree(struct sk_buff *skb, int final)
 {
 	struct sock *sk = skb->sk;
 	unsigned int len = skb->truesize;
@@ -1279,18 +1279,22 @@ void sock_wfree(struct sk_buff *skb)
 	 */
 	if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
 		__sk_free(sk);
+	skb->sk = NULL;
+	skb->destructor = NULL;
 }
 EXPORT_SYMBOL(sock_wfree);
 
 /*
  * Read buffer destructor automatically called from kfree_skb.
  */
-void sock_rfree(struct sk_buff *skb)
+void sock_rfree(struct sk_buff *skb, int final)
 {
 	struct sock *sk = skb->sk;
 
 	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
 	sk_mem_uncharge(skb->sk, skb->truesize);
+	skb->sk = NULL;
+	skb->destructor = NULL;
 }
 EXPORT_SYMBOL(sock_rfree);
 
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 98dfcce..a3b0a95 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -973,9 +973,11 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_core);
 
 /* Automatically called when the skb is freed.
  */
-static void l2tp_sock_wfree(struct sk_buff *skb)
+static void l2tp_sock_wfree(struct sk_buff *skb, int final)
 {
 	sock_put(skb->sk);
+	skb->sk = NULL;
+	skb->destructor = NULL;
 }
 
 /* For data skbs that we transmit, we associate with the tunnel socket
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
index 5490fc3..17dd2d9 100644
--- a/net/netfilter/nf_tproxy_core.c
+++ b/net/netfilter/nf_tproxy_core.c
@@ -55,7 +55,7 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
 EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4);
 
 static void
-nf_tproxy_destructor(struct sk_buff *skb)
+nf_tproxy_destructor(struct sk_buff *skb, int final)
 {
 	struct sock *sk = skb->sk;
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f162d59..dc8e843 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -808,7 +808,7 @@ ring_is_full:
 	goto drop_n_restore;
 }
 
-static void tpacket_destruct_skb(struct sk_buff *skb)
+static void tpacket_destruct_skb(struct sk_buff *skb, int final)
 {
 	struct packet_sock *po = pkt_sk(skb->sk);
 	void *ph;
@@ -823,7 +823,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
 		__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
 	}
 
-	sock_wfree(skb);
+	sock_wfree(skb, final);
 }
 
 static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3d9122e..17fca55 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1303,7 +1303,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 		unix_notinflight(scm->fp->fp[i]);
 }
 
-static void unix_destruct_fds(struct sk_buff *skb)
+static void unix_destruct_fds(struct sk_buff *skb, int final)
 {
 	struct scm_cookie scm;
 	memset(&scm, 0, sizeof(scm));
@@ -1312,7 +1312,7 @@ static void unix_destruct_fds(struct sk_buff *skb)
 	/* Alas, it calls VFS */
 	/* So fscking what? fput() had been SMP-safe since the last Summer */
 	scm_destroy(&scm);
-	sock_wfree(skb);
+	sock_wfree(skb, final);
 }
 
 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists