[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4ACD3AC8.608@gmail.com>
Date: Thu, 08 Oct 2009 03:05:12 +0200
From: Eric Dumazet <eric.dumazet@...il.com>
To: Neil Horman <nhorman@...driver.com>
CC: netdev@...r.kernel.org, davem@...emloft.net, socketcan@...tkopp.net
Subject: Re: [PATCH] Generalize socket rx gap / receive queue overflow cmsg
Neil Horman a écrit :
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 7626b6a..8bd366f 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -306,6 +306,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
> skb_len = skb->len;
>
> skb_queue_tail(&sk->sk_receive_queue, skb);
> + skb->dropcount = atomic_read(&sk->sk_drops);
No, skb was given to skb_queue_tail(), you are not allowed to touch it now,
another cpu might already consume it.
You better do :
struct sk_buff_head *list = &sk->sk_receive_queue;
spin_lock_irqsave(&list->lock, flags);
skb->dropcount = atomic_read(&sk->sk_drops); // should be done under lock protection
__skb_queue_tail(list, newsk);
spin_unlock_irqrestore(&list->lock, flags);
>
> if (!sock_flag(sk, SOCK_DEAD))
> sk->sk_data_ready(sk, skb_len);
> @@ -702,6 +703,12 @@ set_rcvbuf:
>
> /* We implement the SO_SNDLOWAT etc to
> not be settable (1003.1g 5.3) */
> + case SO_RXQ_OVFL:
> + if (valbool)
> + set_bit(SOCK_RXQ_OVFL, &sock->flags);
> + else
> + clear_bit(SOCK_RXQ_OVFL, &sock->flags);
> + break;
> default:
> ret = -ENOPROTOOPT;
> break;
> @@ -901,6 +908,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
> v.val = sk->sk_mark;
> break;
>
> + case SO_RXQ_OVFL:
> + v.val = test_bit(SOCK_RXQ_OVFL, &sock->flags);
> + break;
> +
> default:
> return -ENOPROTOOPT;
> }
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index d7ecca0..920ae1e 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -617,6 +617,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
> if (pskb_trim(skb, snaplen))
> goto drop_n_acct;
>
> + skb->dropcount = atomic_read(&sk->sk_drops);
This should be done a litle bit after, right before "__skb_queue_tail(&sk->sk_receive_queue, skb); "
> skb_set_owner_r(skb, sk);
> skb->dev = NULL;
> skb_dst_drop(skb);
> @@ -634,6 +635,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
> drop_n_acct:
> spin_lock(&sk->sk_receive_queue.lock);
> po->stats.tp_drops++;
> + atomic_inc(&sk->sk_drops);
> spin_unlock(&sk->sk_receive_queue.lock);
You could replace this block of four lines by : po->stat.tp_drop = atomic_inc_return(&sk->sk_drops);
>
> drop_n_restore:
> diff --git a/net/socket.c b/net/socket.c
> index 7565536..ad157a3 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -673,6 +673,12 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
> {
> int err;
> struct sock_iocb *si = kiocb_to_siocb(iocb);
> + struct sk_buff *skb;
> + int rc;
> + struct sock *sk = sock->sk;
> + unsigned long cpu_flags;
> + __u32 gap = 0;
> + int check_drops = test_bit(SOCK_RXQ_OVFL, &sock->flags);
>
> si->sock = sock;
> si->scm = NULL;
> @@ -684,7 +690,21 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
> if (err)
> return err;
>
> - return sock->ops->recvmsg(iocb, sock, msg, size, flags);
> + if (check_drops) {
> + skb = skb_recv_datagram(sk, flags|MSG_PEEK,
> + flags & MSG_DONTWAIT, &err);
Ouch, this is too expensive, please find another way :)
> + if (skb) {
> + gap = skb->dropcount;
> + consume_skb(skb);
> + }
> + }
> +
> + rc = sock->ops->recvmsg(iocb, sock, msg, size, flags);
> +
> + if (check_drops && (rc > 0))
&& gap != 0
> + put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, sizeof(__u32), &gap);
> +
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists