[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4AC34F0D.1060101@gmail.com>
Date: Wed, 30 Sep 2009 14:29:01 +0200
From: Eric Dumazet <eric.dumazet@...il.com>
To: Neil Horman <nhorman@...driver.com>
CC: netdev@...r.kernel.org, davem@...emloft.net
Subject: Re: [PATCH] af_packet: add interframe drop cmsg (v4)
Neil Horman a écrit :
> Ok Version 4 of this patch. Another round of notes from Eric incorporated:
>
> Change Notes:
>
> 1) Modified the gap overflow tracking so that we only check for overflow when we
> are going to successfully enqueue a packet to the socket receive queue. This
> implies that we only print a warning message in the event that we have enough
> space to enqueue frames, so we aren't wasting time printing warnings when we're
> memory constrained.
>
> Neil
> ======================================================================
>
>
> Add Ancilliary data to better represent loss information
>
> I've had a few requests recently to provide more detail regarding frame loss
> during an AF_PACKET packet capture session. Specifically the requestors want to
> see where in a packet sequence frames were lost, i.e. they want to see that 40
> frames were lost between frames 302 and 303 in a packet capture file. In order
> to do this we need:
>
> 1) The kernel to export this data to user space
> 2) The applications to make use of it
>
> This patch addresses item (1). It does this by doing the following:
>
> A) attaching ancilliary data to any skb enqueued to a socket recieve queue for
> which frames were lost between it and the previously enqueued frame. Note I use
> a ring buffer with a correlator value (the skb pointer) to do this. This was
> done because the skb->cb array is exhausted already for AF_PACKET
>
> B) For any frame dequeued that has ancilliary data in the ring buffer (as
> determined by the correlator value), we add a cmsg structure to the msghdr that
> gets copied to user space, this cmsg structure is of cmsg_level AF_PACKET, and
> cmsg_type PACKET_GAPDATA. It contains a u32 value which counts the number of
> frames lost between the reception of the frame being currently recevied and the
> frame most recently preceding it. Note this creates a situation in which if we
> have packet loss followed immediately by a socket close operation we might miss
> some gap information. This situation is covered by the use of the
> PACKET_AUXINFO socket option, which provides total loss stats (from which the
> final gap can be computed).
>
> I've tested this patch myself, and it works well.
>
> Signed-off-by: Neil Horman <nhorman@...driver.com>
>
>
> include/linux/if_packet.h | 2 ++
> net/packet/af_packet.c | 45 +++++++++++++++++++++++++++++++++++++++++++--
> 2 files changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
> index dea7d6b..e5d200f 100644
> --- a/include/linux/if_packet.h
> +++ b/include/linux/if_packet.h
> @@ -48,11 +48,13 @@ struct sockaddr_ll
> #define PACKET_RESERVE 12
> #define PACKET_TX_RING 13
> #define PACKET_LOSS 14
> +#define PACKET_GAPDATA 15
>
> struct tpacket_stats
> {
> unsigned int tp_packets;
> unsigned int tp_drops;
> + unsigned int tp_gap;
> };
>
> struct tpacket_auxdata
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index d3d52c6..678a269 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -207,7 +207,8 @@ struct packet_sock {
> };
>
> struct packet_skb_cb {
> - unsigned int origlen;
> + unsigned short origlen;
> + unsigned short gap;
> union {
> struct sockaddr_pkt pkt;
> struct sockaddr_ll ll;
> @@ -524,6 +525,34 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
> }
>
> /*
> + * If we've lost frames since the last time we queued one to the
> + * sk_receive_queue, we need to record it here.
> + * This must be called under the protection of the socket lock
> + * to prevent racing with other softirqs and user space
> + */
> +static inline void record_packet_gap(struct sk_buff *skb,
> + struct packet_sock *po)
> +{
> + if ((po->stats.tp_gap > 0xffff) && net_ratelimit()) {
> + pr_warning("Packet socket overflowed %d times\n",
> + po->stats.tp_gap - 0xffff);
> + po->stats.tp_gap = 0xffff;
> + }
Hmm... in case net_ratelimit() is false we dont want to report overflowed
value to user (especially if tp_gap = 0x10000 (->PACKET_SKB_CB(skb)->gap = 0 !)
Only the pr_warning() should be ratelimited, not the caping itself
So :
if (po->stats.tp_gap > 0xffff) {
if (net_ratelimit())
pr_warning("Packet socket overflowed %d times\n",
po->stats.tp_gap - 0xffff);
po->stats.tp_gap = 0xffff;
}
PACKET_SKB_CB(skb)->gap = po->stats.tp_gap;
po->stats.tp_gap = 0;
> +
> + PACKET_SKB_CB(skb)->gap = po->stats.tp_gap;
> + po->stats.tp_gap = 0;
> + return;
You dont need this return; clause in this void function.
>
+
> +}
> +
> +static inline __u16 check_packet_gap(struct sk_buff *skb)
> +{
> + return PACKET_SKB_CB(skb)->gap;
> +}
> +
> +#define INC_GAP_STAT(po) ((po)->stats.tp_gap++)
> +
> +/*
> This function makes lazy skb cloning in hope that most of packets
> are discarded by BPF.
>
> @@ -612,7 +641,11 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
>
> sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
>
> - PACKET_SKB_CB(skb)->origlen = skb->len;
> + PACKET_SKB_CB(skb)->origlen = (unsigned short)skb->len;
> + if (unlikely((skb->len > PACKET_SKB_CB(skb)->origlen) &&
> + net_ratelimit())) {
> + pr_warning("SKB len overflowed aux data for af_packet!\n");
> + }
>
> if (pskb_trim(skb, snaplen))
> goto drop_n_acct;
> @@ -626,6 +659,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
>
> spin_lock(&sk->sk_receive_queue.lock);
> po->stats.tp_packets++;
> + record_packet_gap(skb, po);
> __skb_queue_tail(&sk->sk_receive_queue, skb);
> spin_unlock(&sk->sk_receive_queue.lock);
> sk->sk_data_ready(sk, skb->len);
> @@ -634,6 +668,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
> drop_n_acct:
> spin_lock(&sk->sk_receive_queue.lock);
> po->stats.tp_drops++;
> + INC_GAP_STAT(po);
> spin_unlock(&sk->sk_receive_queue.lock);
>
> drop_n_restore:
> @@ -811,6 +846,7 @@ drop:
>
> ring_is_full:
> po->stats.tp_drops++;
> + INC_GAP_STAT(po);
> spin_unlock(&sk->sk_receive_queue.lock);
>
> sk->sk_data_ready(sk, 0);
> @@ -1418,6 +1454,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
> struct sk_buff *skb;
> int copied, err;
> struct sockaddr_ll *sll;
> + __u16 gap;
>
> err = -EINVAL;
> if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
> @@ -1496,6 +1533,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
> put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
> }
>
> + gap = check_packet_gap(skb);
> + if (gap)
> + put_cmsg(msg, SOL_PACKET, PACKET_GAPDATA, sizeof(__u16), &gap);
> +
> /*
> * Free or return the buffer as appropriate. Again this
> * hides all the races and re-entrancy issues from us.
Thanks
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists