[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <49E85AFD.6080407@cosmosbay.com>
Date: Fri, 17 Apr 2009 12:33:33 +0200
From: Eric Dumazet <dada1@...mosbay.com>
To: David Miller <davem@...emloft.net>
CC: netdev@...r.kernel.org
Subject: Re: [PATCH] loopback: better handling of packet drops
Eric Dumazet a écrit :
> Eric Dumazet a écrit :
>> David Miller a écrit :
>>> From: Eric Dumazet <dada1@...mosbay.com>
>>> Date: Fri, 17 Apr 2009 10:56:57 +0200
>>>
>>>> We can in some situations drop packets in netif_rx()
>>>>
>>>> loopback driver does not report these (unlikely) drops to its stats,
>>>> and incorrectly change packets/bytes counts.
>>>>
>>>> After this patch applied, "ifconfig lo" can reports these drops as in :
>>>>
>>>> # ifconfig lo
>>>> lo Link encap:Local Loopback
>>>> inet addr:127.0.0.1 Mask:255.0.0.0
>>>> UP LOOPBACK RUNNING MTU:16436 Metric:1
>>>> RX packets:692562900 errors:0 dropped:0 overruns:0 frame:0
>>>> TX packets:692562900 errors:3228 dropped:3228 overruns:0 carrier:0
>>>> collisions:0 txqueuelen:0
>>>> RX bytes:2865674174 (2.6 GiB) TX bytes:2865674174 (2.6 GiB)
>>>>
>>>> I chose to reflect those errors only in tx_dropped/tx_errors, and not mirror
>>>> these errors in rx_dropped/rx_errors.
>>>>
>>>> Signed-off-by: Eric Dumazet <dada1@...mosbay.com>
>>> Well, logically the receive is what failed, not the transmit.
>>>
>>> I think it's therefore misleading to count it as a TX drop.
>>>
>>> Do you feel strongly about this?
>> Not at all, but my plan was to go a litle bit further, ie being able to
>> return from loopback_xmit() with a non null value.
>>
>
> Something like this :
I just noticed NETDEV_TX_BUSY & NETDEV_TX_OK, so here is an updated version
using these macros instead of 0 & 1
[PATCH] loopback: better handling of packet drops
We can in some situations drop packets in netif_rx()
loopback driver does not report these (unlikely) drops to its stats,
and incorrectly change packets/bytes counts. Also upper layers are
not warned of these transmit failures.
After this patch applied, "ifconfig lo" can reports these drops as in :
# ifconfig lo
lo Link encap:Local Loopback
inet addr:127.0.0.1 Mask:255.0.0.0
UP LOOPBACK RUNNING MTU:16436 Metric:1
RX packets:692562900 errors:0 dropped:0 overruns:0 frame:0
TX packets:692562900 errors:3228 dropped:3228 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:2865674174 (2.6 GiB) TX bytes:2865674174 (2.6 GiB)
More over, loopback_xmit() can now return to its caller the indication that
packet was not transmitted for better queue management and error handling.
I chose to reflect those errors only in tx_dropped/tx_errors, and not mirror
them in rx_dropped/rx_errors.
Splitting netif_rx() with a helper function boosts tbench performance by 1%,
because we can avoid two tests (about netpoll and timestamping)
Tested with /proc/sys/net/core/netdev_max_backlog set to 0, tbench
can run at full speed even with some 'losses' on loopback. No more
tcp stalls...
Signed-off-by: Eric Dumazet <dada1@...mosbay.com>
---
drivers/net/loopback.c | 24 +++++++++---
include/linux/netdevice.h | 1
net/core/dev.c | 68 +++++++++++++++++++++++-------------
3 files changed, 62 insertions(+), 31 deletions(-)
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index b7d438a..101a3bc 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -62,6 +62,7 @@
struct pcpu_lstats {
unsigned long packets;
unsigned long bytes;
+ unsigned long drops;
};
/*
@@ -71,20 +72,25 @@ struct pcpu_lstats {
static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct pcpu_lstats *pcpu_lstats, *lb_stats;
+ int len;
skb_orphan(skb);
- skb->protocol = eth_type_trans(skb,dev);
+ skb->protocol = eth_type_trans(skb, dev);
/* it's OK to use per_cpu_ptr() because BHs are off */
pcpu_lstats = dev->ml_priv;
lb_stats = per_cpu_ptr(pcpu_lstats, smp_processor_id());
- lb_stats->bytes += skb->len;
- lb_stats->packets++;
- netif_rx(skb);
+ len = skb->len;
+ if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) {
+ lb_stats->bytes += len;
+ lb_stats->packets++;
+ return NETDEV_TX_OK;
+ }
+ lb_stats->drops++;
- return 0;
+ return NETDEV_TX_BUSY;
}
static struct net_device_stats *loopback_get_stats(struct net_device *dev)
@@ -93,6 +99,7 @@ static struct net_device_stats *loopback_get_stats(struct net_device *dev)
struct net_device_stats *stats = &dev->stats;
unsigned long bytes = 0;
unsigned long packets = 0;
+ unsigned long drops = 0;
int i;
pcpu_lstats = dev->ml_priv;
@@ -102,11 +109,14 @@ static struct net_device_stats *loopback_get_stats(struct net_device *dev)
lb_stats = per_cpu_ptr(pcpu_lstats, i);
bytes += lb_stats->bytes;
packets += lb_stats->packets;
+ drops += lb_stats->drops;
}
stats->rx_packets = packets;
stats->tx_packets = packets;
- stats->rx_bytes = bytes;
- stats->tx_bytes = bytes;
+ stats->tx_dropped = drops;
+ stats->tx_errors = drops;
+ stats->rx_bytes = bytes;
+ stats->tx_bytes = bytes;
return stats;
}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2e7783f..c60e250 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1430,6 +1430,7 @@ extern void dev_kfree_skb_irq(struct sk_buff *skb);
extern void dev_kfree_skb_any(struct sk_buff *skb);
#define HAVE_NETIF_RX 1
+extern int __netif_rx(struct sk_buff *skb);
extern int netif_rx(struct sk_buff *skb);
extern int netif_rx_ni(struct sk_buff *skb);
#define HAVE_NETIF_RECEIVE_SKB 1
diff --git a/net/core/dev.c b/net/core/dev.c
index 343883f..8ae3f19 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1909,6 +1909,44 @@ int weight_p __read_mostly = 64; /* old backlog weight */
DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
+/*
+ * helper function called from netif_rx() or loopback_xmit()
+ */
+int __netif_rx(struct sk_buff *skb)
+{
+ struct softnet_data *queue;
+ unsigned long flags;
+
+ /*
+ * The code is rearranged so that the path is the most
+ * short when CPU is congested, but is still operating.
+ */
+ local_irq_save(flags);
+ queue = &__get_cpu_var(softnet_data);
+
+ __get_cpu_var(netdev_rx_stat).total++;
+ if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
+ if (queue->input_pkt_queue.qlen) {
+enqueue:
+ __skb_queue_tail(&queue->input_pkt_queue, skb);
+ local_irq_restore(flags);
+ return NET_RX_SUCCESS;
+ }
+
+ napi_schedule(&queue->backlog);
+ goto enqueue;
+ }
+
+ __get_cpu_var(netdev_rx_stat).dropped++;
+ local_irq_restore(flags);
+ /*
+ * Dont free skb here.
+ * netif_rx() will call kfree_skb(skb)
+ * loopback_xmit() will not free it but return an error to its caller
+ */
+ return NET_RX_DROP;
+}
+
/**
* netif_rx - post buffer to the network code
* @skb: buffer to post
@@ -1928,6 +1966,7 @@ int netif_rx(struct sk_buff *skb)
{
struct softnet_data *queue;
unsigned long flags;
+ int ret;
/* if netpoll wants it, pretend we never saw it */
if (netpoll_rx(skb))
@@ -1936,32 +1975,14 @@ int netif_rx(struct sk_buff *skb)
if (!skb->tstamp.tv64)
net_timestamp(skb);
- /*
- * The code is rearranged so that the path is the most
- * short when CPU is congested, but is still operating.
- */
- local_irq_save(flags);
- queue = &__get_cpu_var(softnet_data);
-
- __get_cpu_var(netdev_rx_stat).total++;
- if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
- if (queue->input_pkt_queue.qlen) {
-enqueue:
- __skb_queue_tail(&queue->input_pkt_queue, skb);
- local_irq_restore(flags);
- return NET_RX_SUCCESS;
- }
-
- napi_schedule(&queue->backlog);
- goto enqueue;
- }
+ ret = __netif_rx(skb);
- __get_cpu_var(netdev_rx_stat).dropped++;
- local_irq_restore(flags);
+ if (unlikely(ret == NET_RX_DROP))
+ kfree_skb(skb);
- kfree_skb(skb);
- return NET_RX_DROP;
+ return ret;
}
+EXPORT_SYMBOL(netif_rx);
int netif_rx_ni(struct sk_buff *skb)
{
@@ -5307,7 +5328,6 @@ EXPORT_SYMBOL(netdev_boot_setup_check);
EXPORT_SYMBOL(netdev_set_master);
EXPORT_SYMBOL(netdev_state_change);
EXPORT_SYMBOL(netif_receive_skb);
-EXPORT_SYMBOL(netif_rx);
EXPORT_SYMBOL(register_gifconf);
EXPORT_SYMBOL(register_netdevice);
EXPORT_SYMBOL(register_netdevice_notifier);
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists