[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <AANLkTi=ZzvCNef7r81WZdbZsiSquYhDrnYAY73r4mo+t@mail.gmail.com>
Date: Sat, 11 Sep 2010 00:47:18 +0800
From: Changli Gao <xiaosuo@...il.com>
To: Eric Dumazet <eric.dumazet@...il.com>
Cc: "David S. Miller" <davem@...emloft.net>,
Oliver Hartkopp <socketcan@...tkopp.net>,
"Michael S. Tsirkin" <mst@...hat.com>, netdev@...r.kernel.org
Subject: Re: [PATCH] net: af_packet: don't call tpacket_destruct_skb() until
the skb is sent out
On Fri, Sep 10, 2010 at 10:26 PM, Eric Dumazet <eric.dumazet@...il.com> wrote:
> Le vendredi 10 septembre 2010 à 21:22 +0800, Changli Gao a écrit :
>> Since skb->destructor() is used to account socket memory, and maybe called
>> before the skb is sent out, a corrupt skb maybe sent out finally.
>>
>> A new destructor is added into structure skb_shared_info(), and it won't
>> be called until the last reference to the data of a skb is put. af_packet
>> uses this destructor instead.
>>
>
> Hi Changli
>
>> static void tpacket_destruct_skb(struct sk_buff *skb)
>> {
>> - struct packet_sock *po = pkt_sk(skb->sk);
>> - void *ph;
>> -
>> - BUG_ON(skb == NULL);
>> + struct tpacket_destructor_arg *arg = skb_shinfo(skb)->destructor_arg;
>> + struct packet_sock *po = pkt_sk(arg->sk);
>> + void *ph = arg->ph;
>>
>> if (likely(po->tx_ring.pg_vec)) {
>> - ph = skb_shinfo(skb)->destructor_arg;
>> BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
>> BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
>> atomic_dec(&po->tx_ring.pending);
>> __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
>> }
>>
>> + skb->sk = arg->sk;
>> sock_wfree(skb);
>
> Are you sure sock_wfree(skb) is still needed ?
sock_wfree(skb) is also used to wake up the users who sleep on
poll(2). If sock_wfree(skb) is moved into skb->destructor(), and
called before skb is sent out, pollers will be waked up without
POLLOUT, and since the later skb_shinfo(skb)->destructor() doesn't
wake up the pollers, POLLOUT events will be lost, and the poller will
be blocked forever.
>
>
>> + kfree(arg);
>
> this new kmalloc()/kfree() for each sent packet wont please the guys
> using af_packet/mmap interface...
Embed these two pointers into skb_shared_info? It may slow the others.
>
>> }
>>
>> static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
>> @@ -862,7 +867,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
>> skb->dev = dev;
>> skb->priority = po->sk.sk_priority;
>> skb->mark = po->sk.sk_mark;
>> - skb_shinfo(skb)->destructor_arg = ph.raw;
>>
>> switch (po->tp_version) {
>> case TPACKET_V2:
>> @@ -884,9 +888,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
>> to_write = tp_len;
>>
>> if (sock->type == SOCK_DGRAM) {
>> - err = dev_hard_header(skb, dev, ntohs(proto), addr,
>> - NULL, tp_len);
>> - if (unlikely(err < 0))
>> + if (unlikely(dev_hard_header(skb, dev, ntohs(proto), addr,
>> + NULL, tp_len) < 0))
>> return -EINVAL;
>> } else if (dev->hard_header_len) {
>> /* net device doesn't like empty head */
>> @@ -897,8 +900,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
>> }
>>
>> skb_push(skb, dev->hard_header_len);
>> - err = skb_store_bits(skb, 0, data,
>> - dev->hard_header_len);
>> + err = skb_store_bits(skb, 0, data, dev->hard_header_len);
>> if (unlikely(err))
>> return err;
>>
>> @@ -906,7 +908,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
>> to_write -= dev->hard_header_len;
>> }
>>
>> - err = -EFAULT;
>> page = virt_to_page(data);
>> offset = offset_in_page(data);
>> len_max = PAGE_SIZE - offset;
>> @@ -994,6 +995,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>> size_max = dev->mtu + reserve;
>>
>> do {
>> + struct tpacket_destructor_arg *arg;
>> +
>> ph = packet_current_frame(po, &po->tx_ring,
>> TP_STATUS_SEND_REQUEST);
>>
>> @@ -1028,7 +1031,16 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>> }
>> }
>>
>> - skb->destructor = tpacket_destruct_skb;
>> + arg = kmalloc(sizeof(*arg), GFP_KERNEL);
>> + if (unlikely(arg == NULL)) {
>> + err = -ENOBUFS;
>> + goto out_status;
>> + }
>> + arg->sk = &po->sk;
>> + arg->ph = ph;
>> + skb_shinfo(skb)->destructor_arg = arg;
>> + skb->destructor = NULL;
>
> why setting skb->destructor to NULL here ?
Let skb_shinfo(skb)->destructor() do all the things which used to be
done by skb->destructor().
>
>> + skb_shinfo(skb)->destructor = tpacket_destruct_skb;
>> __packet_set_status(po, ph, TP_STATUS_SENDING);
>> atomic_inc(&po->tx_ring.pending);
>>
>
> I dont yet understand how this can prevent af_unix module being unloaded
> while packets are in flight
This issue isn't addressed, and I think it should be fixed in a separate patch.
>
> I believe sock_wfree() should be avoided (since early orphaning occurs),
> to reduce number of atomic ops to the minimum.
>
> af_packet/mmap users want fast operations, we should not use
> sock_wfree() for them, because max number of in flight packets is known
> (tx ring buffer)
>
>
But the users rely on the kernel to inform them there is available
frame for use.
--
Regards,
Changli Gao(xiaosuo@...il.com)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists