[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1285236939-3239-1-git-send-email-xiaosuo@gmail.com>
Date: Thu, 23 Sep 2010 18:15:39 +0800
From: Changli Gao <xiaosuo@...il.com>
To: "David S. Miller" <davem@...emloft.net>
Cc: Eric Dumazet <eric.dumazet@...il.com>,
Oliver Hartkopp <socketcan@...tkopp.net>,
"Michael S. Tsirkin" <mst@...hat.com>, netdev@...r.kernel.org,
Changli Gao <xiaosuo@...il.com>
Subject: [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out
Since skb->destructor() is used to account socket memory, and maybe called
before the skb is sent out, a corrupt skb maybe sent out finally.
A new destructor is added into structure skb_shared_info(), and it won't
be called until the last reference to the data of an skb is put. af_packet
uses this destructor instead.
Signed-off-by: Changli Gao <xiaosuo@...il.com>
---
v3: rename destructor to data_destructor, destructor_arg to data_destructor_arg,
fix splice the skbs generated by AF_PACKET socket to the pipe.
v2: avoid kmalloc/kfree
include/linux/skbuff.h | 7 ++++---
net/core/skbuff.c | 29 ++++++++++++++++++++---------
net/packet/af_packet.c | 25 ++++++++++++-------------
3 files changed, 36 insertions(+), 25 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9e8085a..0854135 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -191,15 +191,16 @@ struct skb_shared_info {
__u8 tx_flags;
struct sk_buff *frag_list;
struct skb_shared_hwtstamps hwtstamps;
+ void (*data_destructor)(struct sk_buff *skb);
/*
* Warning : all fields before dataref are cleared in __alloc_skb()
*/
atomic_t dataref;
- /* Intermediate layers must ensure that destructor_arg
- * remains valid until skb destructor */
- void * destructor_arg;
+ /* Intermediate layers must ensure that data_destructor_arg
+ * remains valid until skb data destructor */
+ void *data_destructor_arg[2];
/* must be last field, see pskb_expand_head() */
skb_frag_t frags[MAX_SKB_FRAGS];
};
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 752c197..95a48fb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -332,10 +332,14 @@ static void skb_release_data(struct sk_buff *skb)
if (!skb->cloned ||
!atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
&skb_shinfo(skb)->dataref)) {
- if (skb_shinfo(skb)->nr_frags) {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+ if (shinfo->data_destructor)
+ shinfo->data_destructor(skb);
+ if (shinfo->nr_frags) {
int i;
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ for (i = 0; i < shinfo->nr_frags; i++)
+ put_page(shinfo->frags[i].page);
}
if (skb_has_frag_list(skb))
@@ -497,9 +501,12 @@ bool skb_recycle_check(struct sk_buff *skb, int skb_size)
if (skb_shared(skb) || skb_cloned(skb))
return false;
+ shinfo = skb_shinfo(skb);
+ if (shinfo->data_destructor)
+ return false;
+
skb_release_head_state(skb);
- shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(&shinfo->dataref, 1);
@@ -799,7 +806,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
memcpy((struct skb_shared_info *)(data + size),
skb_shinfo(skb),
- offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
+ offsetof(struct skb_shared_info,
+ frags[skb_shinfo(skb)->nr_frags]));
+ skb_shinfo(skb)->data_destructor = NULL;
/* Check if we can avoid taking references on fragments if we own
* the last reference on skb->head. (see skb_release_data())
@@ -1408,7 +1417,7 @@ new_page:
static inline int spd_fill_page(struct splice_pipe_desc *spd,
struct pipe_inode_info *pipe, struct page *page,
unsigned int *len, unsigned int offset,
- struct sk_buff *skb, int linear,
+ struct sk_buff *skb, bool linear,
struct sock *sk)
{
if (unlikely(spd->nr_pages == pipe->buffers))
@@ -1446,7 +1455,7 @@ static inline void __segment_seek(struct page **page, unsigned int *poff,
static inline int __splice_segment(struct page *page, unsigned int poff,
unsigned int plen, unsigned int *off,
unsigned int *len, struct sk_buff *skb,
- struct splice_pipe_desc *spd, int linear,
+ struct splice_pipe_desc *spd, bool linear,
struct sock *sk,
struct pipe_inode_info *pipe)
{
@@ -1498,7 +1507,7 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
if (__splice_segment(virt_to_page(skb->data),
(unsigned long) skb->data & (PAGE_SIZE - 1),
skb_headlen(skb),
- offset, len, skb, spd, 1, sk, pipe))
+ offset, len, skb, spd, true, sk, pipe))
return 1;
/*
@@ -1508,7 +1517,9 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
if (__splice_segment(f->page, f->page_offset, f->size,
- offset, len, skb, spd, 0, sk, pipe))
+ offset, len, skb, spd,
+ skb_shinfo(skb)->data_destructor != NULL,
+ sk, pipe))
return 1;
}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3616f27..ecf57c7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -825,19 +825,19 @@ ring_is_full:
static void tpacket_destruct_skb(struct sk_buff *skb)
{
- struct packet_sock *po = pkt_sk(skb->sk);
- void *ph;
-
- BUG_ON(skb == NULL);
+ struct packet_sock *po;
+ po = pkt_sk(skb_shinfo(skb)->data_destructor_arg[0]);
if (likely(po->tx_ring.pg_vec)) {
- ph = skb_shinfo(skb)->destructor_arg;
+ void *ph = skb_shinfo(skb)->data_destructor_arg[1];
+
BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
atomic_dec(&po->tx_ring.pending);
__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
}
+ skb->sk = &po->sk;
sock_wfree(skb);
}
@@ -862,7 +862,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->dev = dev;
skb->priority = po->sk.sk_priority;
skb->mark = po->sk.sk_mark;
- skb_shinfo(skb)->destructor_arg = ph.raw;
switch (po->tp_version) {
case TPACKET_V2:
@@ -884,9 +883,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
to_write = tp_len;
if (sock->type == SOCK_DGRAM) {
- err = dev_hard_header(skb, dev, ntohs(proto), addr,
- NULL, tp_len);
- if (unlikely(err < 0))
+ if (unlikely(dev_hard_header(skb, dev, ntohs(proto), addr,
+ NULL, tp_len) < 0))
return -EINVAL;
} else if (dev->hard_header_len) {
/* net device doesn't like empty head */
@@ -897,8 +895,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
}
skb_push(skb, dev->hard_header_len);
- err = skb_store_bits(skb, 0, data,
- dev->hard_header_len);
+ err = skb_store_bits(skb, 0, data, dev->hard_header_len);
if (unlikely(err))
return err;
@@ -906,7 +903,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
to_write -= dev->hard_header_len;
}
- err = -EFAULT;
page = virt_to_page(data);
offset = offset_in_page(data);
len_max = PAGE_SIZE - offset;
@@ -1028,7 +1024,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
}
}
- skb->destructor = tpacket_destruct_skb;
+ skb_shinfo(skb)->data_destructor_arg[0] = &po->sk;
+ skb_shinfo(skb)->data_destructor_arg[1] = ph;
+ skb->destructor = NULL;
+ skb_shinfo(skb)->data_destructor = tpacket_destruct_skb;
__packet_set_status(po, ph, TP_STATUS_SENDING);
atomic_inc(&po->tx_ring.pending);
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists