lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <AANLkTikLgHvtpCtBTKmJZBwixmZDHjRjGb1c59oAemli@mail.gmail.com>
Date:	Thu, 6 May 2010 08:12:57 -0700
From:	Tom Herbert <therbert@...gle.com>
To:	Eric Dumazet <eric.dumazet@...il.com>
Cc:	David Miller <davem@...emloft.net>, netdev <netdev@...r.kernel.org>
Subject: Re: [PATCH net-next-2.6] net: Consistent skb timestamping

On Thu, May 6, 2010 at 5:01 AM, Eric Dumazet <eric.dumazet@...il.com> wrote:
> With RPS inclusion, skb timestamping is not consistent in RX path.
>
> If netif_receive_skb() is used, its deferred after RPS dispatch.
>
> If netif_rx() is used, its done before RPS dispatch.
>
> This can give strange tcpdump timestamps results.
>
> I think timestamping should be done as soon as possible in the receive
> path, to get meaningful values (ie timestamps taken at the time packet
> was delivered by NIC driver to our stack), even if NAPI already can
> defer timestamping a bit (RPS can help to reduce the gap)
>
The counter argument to this is that it moves another thing into the
serialized path for networking which slows everyone down.  I'm not
concerned about when tcpdump is running since performance will suck
anyway, but what is bad is if any single socket in the system turns on
SO_TIMESTAMP, overhead is incurred on *every* packet.  This happens
regardless of whether the application ever actually gets a timestamp,
or even whether timestamps are supported by the protocol (try setting
SO_TIMESTAMP on a TCP socket ;-) ).  I'm contemplating changing
SO_TIMESTAMP to not enable global timestamps, but only take the
timestamp for a packet once the socket is identified and the timestamp
flag is set (this is the technique done in FreeBSD and Solaris, so I
believe the external semantics would still be valid).

> Remove timestamping from __netif_receive_skb, and add it to
> netif_receive_skb(), before RPS.
>
> Signed-off-by: Eric Dumazet <eric.dumazet@...il.com>
> ---
>  net/core/dev.c |   46 ++++++++++++++++++++++++++--------------------
>  1 file changed, 26 insertions(+), 20 deletions(-)
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 36d53be..3278003 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -1454,7 +1454,7 @@ void net_disable_timestamp(void)
>  }
>  EXPORT_SYMBOL(net_disable_timestamp);
>
> -static inline void net_timestamp(struct sk_buff *skb)
> +static inline void net_timestamp_set(struct sk_buff *skb)
>  {
>        if (atomic_read(&netstamp_needed))
>                __net_timestamp(skb);
> @@ -1462,6 +1462,12 @@ static inline void net_timestamp(struct sk_buff *skb)
>                skb->tstamp.tv64 = 0;
>  }
>
> +static inline void net_timestamp_check(struct sk_buff *skb)
> +{
> +       if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
> +               __net_timestamp(skb);
> +}
> +
>  /**
>  * dev_forward_skb - loopback an skb to another netif
>  *
> @@ -1509,9 +1515,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
>
>  #ifdef CONFIG_NET_CLS_ACT
>        if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
> -               net_timestamp(skb);
> +               net_timestamp_set(skb);
>  #else
> -       net_timestamp(skb);
> +       net_timestamp_set(skb);
>  #endif
>
>        rcu_read_lock();
> @@ -2458,8 +2464,7 @@ int netif_rx(struct sk_buff *skb)
>        if (netpoll_rx(skb))
>                return NET_RX_DROP;
>
> -       if (!skb->tstamp.tv64)
> -               net_timestamp(skb);
> +       net_timestamp_check(skb);
>
>  #ifdef CONFIG_RPS
>        {
> @@ -2780,9 +2785,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
>        int ret = NET_RX_DROP;
>        __be16 type;
>
> -       if (!skb->tstamp.tv64)
> -               net_timestamp(skb);
> -
>        if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
>                return NET_RX_SUCCESS;
>
> @@ -2899,23 +2901,27 @@ out:
>  */
>  int netif_receive_skb(struct sk_buff *skb)
>  {
> +       net_timestamp_check(skb);
> +
>  #ifdef CONFIG_RPS
> -       struct rps_dev_flow voidflow, *rflow = &voidflow;
> -       int cpu, ret;
> +       {
> +               struct rps_dev_flow voidflow, *rflow = &voidflow;
> +               int cpu, ret;
>
> -       rcu_read_lock();
> +               rcu_read_lock();
>
> -       cpu = get_rps_cpu(skb->dev, skb, &rflow);
> +               cpu = get_rps_cpu(skb->dev, skb, &rflow);
>
> -       if (cpu >= 0) {
> -               ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
> -               rcu_read_unlock();
> -       } else {
> -               rcu_read_unlock();
> -               ret = __netif_receive_skb(skb);
> -       }
> +               if (cpu >= 0) {
> +                       ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
> +                       rcu_read_unlock();
> +               } else {
> +                       rcu_read_unlock();
> +                       ret = __netif_receive_skb(skb);
> +               }
>
> -       return ret;
> +               return ret;
> +       }
>  #else
>        return __netif_receive_skb(skb);
>  #endif
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ