[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20090930170410.GA28936@hmsreliant.think-freely.org>
Date: Wed, 30 Sep 2009 13:04:10 -0400
From: Neil Horman <nhorman@...driver.com>
To: Eric Dumazet <eric.dumazet@...il.com>
Cc: netdev@...r.kernel.org, davem@...emloft.net, nhorman@...driver.com
Subject: Re: [PATCH] af_packet: add interframe drop cmsg (v5)
Gah, you're right, I'm sorry. Version 5 attached
Change Notes:
1) Fix record_packet_gap so that only the warning message is rate limited, not
the actual capping of the gap value
==========================================================
Add Ancilliary data to better represent loss information
I've had a few requests recently to provide more detail regarding frame loss
during an AF_PACKET packet capture session. Specifically the requestors want to
see where in a packet sequence frames were lost, i.e. they want to see that 40
frames were lost between frames 302 and 303 in a packet capture file. In order
to do this we need:
1) The kernel to export this data to user space
2) The applications to make use of it
This patch addresses item (1). It does this by doing the following:
A) attaching ancilliary data to any skb enqueued to a socket recieve queue for
which frames were lost between it and the previously enqueued frame. Note I use
a ring buffer with a correlator value (the skb pointer) to do this. This was
done because the skb->cb array is exhausted already for AF_PACKET
B) For any frame dequeued that has ancilliary data in the ring buffer (as
determined by the correlator value), we add a cmsg structure to the msghdr that
gets copied to user space, this cmsg structure is of cmsg_level AF_PACKET, and
cmsg_type PACKET_GAPDATA. It contains a u32 value which counts the number of
frames lost between the reception of the frame being currently recevied and the
frame most recently preceding it. Note this creates a situation in which if we
have packet loss followed immediately by a socket close operation we might miss
some gap information. This situation is covered by the use of the
PACKET_AUXINFO socket option, which provides total loss stats (from which the
final gap can be computed).
I've tested this patch myself, and it works well.
Signed-off-by: Neil Horman <nhorman@...driver.com>
include/linux/if_packet.h | 2 ++
net/packet/af_packet.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 46 insertions(+), 2 deletions(-)
diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index dea7d6b..e5d200f 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -48,11 +48,13 @@ struct sockaddr_ll
#define PACKET_RESERVE 12
#define PACKET_TX_RING 13
#define PACKET_LOSS 14
+#define PACKET_GAPDATA 15
struct tpacket_stats
{
unsigned int tp_packets;
unsigned int tp_drops;
+ unsigned int tp_gap;
};
struct tpacket_auxdata
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d3d52c6..c903249 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -207,7 +207,8 @@ struct packet_sock {
};
struct packet_skb_cb {
- unsigned int origlen;
+ unsigned short origlen;
+ unsigned short gap;
union {
struct sockaddr_pkt pkt;
struct sockaddr_ll ll;
@@ -524,6 +525,35 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
}
/*
+ * If we've lost frames since the last time we queued one to the
+ * sk_receive_queue, we need to record it here.
+ * This must be called under the protection of the socket lock
+ * to prevent racing with other softirqs and user space
+ */
+static inline void record_packet_gap(struct sk_buff *skb,
+ struct packet_sock *po)
+{
+ if (po->stats.tp_gap > 0xffff) {
+ if (net_ratelimit())
+ pr_warning("Packet socket overflowed %d times\n",
+ po->stats.tp_gap - 0xffff);
+ po->stats.tp_gap = 0xffff;
+ }
+
+ PACKET_SKB_CB(skb)->gap = po->stats.tp_gap;
+ po->stats.tp_gap = 0;
+ return;
+
+}
+
+static inline __u16 check_packet_gap(struct sk_buff *skb)
+{
+ return PACKET_SKB_CB(skb)->gap;
+}
+
+#define INC_GAP_STAT(po) ((po)->stats.tp_gap++)
+
+/*
This function makes lazy skb cloning in hope that most of packets
are discarded by BPF.
@@ -612,7 +642,11 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
- PACKET_SKB_CB(skb)->origlen = skb->len;
+ PACKET_SKB_CB(skb)->origlen = (unsigned short)skb->len;
+ if (unlikely((skb->len > PACKET_SKB_CB(skb)->origlen) &&
+ net_ratelimit())) {
+ pr_warning("SKB len overflowed aux data for af_packet!\n");
+ }
if (pskb_trim(skb, snaplen))
goto drop_n_acct;
@@ -626,6 +660,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
spin_lock(&sk->sk_receive_queue.lock);
po->stats.tp_packets++;
+ record_packet_gap(skb, po);
__skb_queue_tail(&sk->sk_receive_queue, skb);
spin_unlock(&sk->sk_receive_queue.lock);
sk->sk_data_ready(sk, skb->len);
@@ -634,6 +669,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
drop_n_acct:
spin_lock(&sk->sk_receive_queue.lock);
po->stats.tp_drops++;
+ INC_GAP_STAT(po);
spin_unlock(&sk->sk_receive_queue.lock);
drop_n_restore:
@@ -811,6 +847,7 @@ drop:
ring_is_full:
po->stats.tp_drops++;
+ INC_GAP_STAT(po);
spin_unlock(&sk->sk_receive_queue.lock);
sk->sk_data_ready(sk, 0);
@@ -1418,6 +1455,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
struct sk_buff *skb;
int copied, err;
struct sockaddr_ll *sll;
+ __u16 gap;
err = -EINVAL;
if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
@@ -1496,6 +1534,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
}
+ gap = check_packet_gap(skb);
+ if (gap)
+ put_cmsg(msg, SOL_PACKET, PACKET_GAPDATA, sizeof(__u16), &gap);
+
/*
* Free or return the buffer as appropriate. Again this
* hides all the races and re-entrancy issues from us.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists