[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <AANLkTikLgHvtpCtBTKmJZBwixmZDHjRjGb1c59oAemli@mail.gmail.com>
Date: Thu, 6 May 2010 08:12:57 -0700
From: Tom Herbert <therbert@...gle.com>
To: Eric Dumazet <eric.dumazet@...il.com>
Cc: David Miller <davem@...emloft.net>, netdev <netdev@...r.kernel.org>
Subject: Re: [PATCH net-next-2.6] net: Consistent skb timestamping
On Thu, May 6, 2010 at 5:01 AM, Eric Dumazet <eric.dumazet@...il.com> wrote:
> With RPS inclusion, skb timestamping is not consistent in RX path.
>
> If netif_receive_skb() is used, its deferred after RPS dispatch.
>
> If netif_rx() is used, its done before RPS dispatch.
>
> This can give strange tcpdump timestamps results.
>
> I think timestamping should be done as soon as possible in the receive
> path, to get meaningful values (ie timestamps taken at the time packet
> was delivered by NIC driver to our stack), even if NAPI already can
> defer timestamping a bit (RPS can help to reduce the gap)
>
The counter argument to this is that it moves another thing into the
serialized path for networking which slows everyone down. I'm not
concerned about when tcpdump is running since performance will suck
anyway, but what is bad is if any single socket in the system turns on
SO_TIMESTAMP, overhead is incurred on *every* packet. This happens
regardless of whether the application ever actually gets a timestamp,
or even whether timestamps are supported by the protocol (try setting
SO_TIMESTAMP on a TCP socket ;-) ). I'm contemplating changing
SO_TIMESTAMP to not enable global timestamps, but only take the
timestamp for a packet once the socket is identified and the timestamp
flag is set (this is the technique done in FreeBSD and Solaris, so I
believe the external semantics would still be valid).
> Remove timestamping from __netif_receive_skb, and add it to
> netif_receive_skb(), before RPS.
>
> Signed-off-by: Eric Dumazet <eric.dumazet@...il.com>
> ---
> net/core/dev.c | 46 ++++++++++++++++++++++++++--------------------
> 1 file changed, 26 insertions(+), 20 deletions(-)
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 36d53be..3278003 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -1454,7 +1454,7 @@ void net_disable_timestamp(void)
> }
> EXPORT_SYMBOL(net_disable_timestamp);
>
> -static inline void net_timestamp(struct sk_buff *skb)
> +static inline void net_timestamp_set(struct sk_buff *skb)
> {
> if (atomic_read(&netstamp_needed))
> __net_timestamp(skb);
> @@ -1462,6 +1462,12 @@ static inline void net_timestamp(struct sk_buff *skb)
> skb->tstamp.tv64 = 0;
> }
>
> +static inline void net_timestamp_check(struct sk_buff *skb)
> +{
> + if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
> + __net_timestamp(skb);
> +}
> +
> /**
> * dev_forward_skb - loopback an skb to another netif
> *
> @@ -1509,9 +1515,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
>
> #ifdef CONFIG_NET_CLS_ACT
> if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
> - net_timestamp(skb);
> + net_timestamp_set(skb);
> #else
> - net_timestamp(skb);
> + net_timestamp_set(skb);
> #endif
>
> rcu_read_lock();
> @@ -2458,8 +2464,7 @@ int netif_rx(struct sk_buff *skb)
> if (netpoll_rx(skb))
> return NET_RX_DROP;
>
> - if (!skb->tstamp.tv64)
> - net_timestamp(skb);
> + net_timestamp_check(skb);
>
> #ifdef CONFIG_RPS
> {
> @@ -2780,9 +2785,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
> int ret = NET_RX_DROP;
> __be16 type;
>
> - if (!skb->tstamp.tv64)
> - net_timestamp(skb);
> -
> if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
> return NET_RX_SUCCESS;
>
> @@ -2899,23 +2901,27 @@ out:
> */
> int netif_receive_skb(struct sk_buff *skb)
> {
> + net_timestamp_check(skb);
> +
> #ifdef CONFIG_RPS
> - struct rps_dev_flow voidflow, *rflow = &voidflow;
> - int cpu, ret;
> + {
> + struct rps_dev_flow voidflow, *rflow = &voidflow;
> + int cpu, ret;
>
> - rcu_read_lock();
> + rcu_read_lock();
>
> - cpu = get_rps_cpu(skb->dev, skb, &rflow);
> + cpu = get_rps_cpu(skb->dev, skb, &rflow);
>
> - if (cpu >= 0) {
> - ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
> - rcu_read_unlock();
> - } else {
> - rcu_read_unlock();
> - ret = __netif_receive_skb(skb);
> - }
> + if (cpu >= 0) {
> + ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
> + rcu_read_unlock();
> + } else {
> + rcu_read_unlock();
> + ret = __netif_receive_skb(skb);
> + }
>
> - return ret;
> + return ret;
> + }
> #else
> return __netif_receive_skb(skb);
> #endif
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists