lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAF=yD-L8Ay4sFNjQaTqNJoqZFF4aaWyTo4pKj4xiS+snG6R++A@mail.gmail.com>
Date:   Mon, 4 Dec 2017 19:16:45 -0500
From:   Willem de Bruijn <willemdebruijn.kernel@...il.com>
To:     Jason Wang <jasowang@...hat.com>
Cc:     Network Development <netdev@...r.kernel.org>,
        LKML <linux-kernel@...r.kernel.org>,
        "Michael S. Tsirkin" <mst@...hat.com>,
        Tom Herbert <tom@...bertland.com>, aconole@...hat.com,
        wexu@...hat.com
Subject: Re: [PATCH net-next V3] tun: add eBPF based queue selection method

On Mon, Dec 4, 2017 at 4:31 AM, Jason Wang <jasowang@...hat.com> wrote:
> This patch introduces an eBPF based queue selection method. With this,
> the policy could be offloaded to userspace completely through a new
> ioctl TUNSETSTEERINGEBPF.
>
> Signed-off-by: Jason Wang <jasowang@...hat.com>
> ---

> +static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
> +{
> +       struct tun_steering_prog *prog;
> +       u16 ret = 0;
> +
> +       prog = rcu_dereference(tun->steering_prog);
> +       if (prog)
> +               ret = bpf_prog_run_clear_cb(prog->prog, skb);

This dereferences tun->steering_prog for a second time. It is safe
in this load balancing case to assign a few extra packets to queue 0.
But the issue can also be avoided by replacing the function with a
direct call in tun_net_xmit:

       struct tun_steering_prog *s = rcu_dereference(tun->steering_prog);
       if (s)
               ret = bpf_prog_run_clear_cb(s->prog, skb) % tun->numqueues;

>  /* Net device start xmit */
> -static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
> +static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
>  {
> -       struct tun_struct *tun = netdev_priv(dev);
> -       int txq = skb->queue_mapping;
> -       struct tun_file *tfile;
> -       u32 numqueues = 0;
> -
> -       rcu_read_lock();
> -       tfile = rcu_dereference(tun->tfiles[txq]);
> -       numqueues = READ_ONCE(tun->numqueues);
> -
> -       /* Drop packet if interface is not attached */
> -       if (txq >= numqueues)
> -               goto drop;
> -
>  #ifdef CONFIG_RPS
> -       if (numqueues == 1 && static_key_false(&rps_needed)) {
> +       if (tun->numqueues == 1 && static_key_false(&rps_needed)) {
>                 /* Select queue was not called for the skbuff, so we extract the
>                  * RPS hash and save it into the flow_table here.
>                  */
> @@ -969,6 +986,26 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>                 }
>         }
>  #endif
> +}
> +
> +/* Net device start xmit */
> +static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
> +{
> +       struct tun_struct *tun = netdev_priv(dev);
> +       int txq = skb->queue_mapping;
> +       struct tun_file *tfile;
> +       u32 numqueues = 0;
> +
> +       rcu_read_lock();
> +       tfile = rcu_dereference(tun->tfiles[txq]);
> +       numqueues = READ_ONCE(tun->numqueues);

Now tun->numqueues is read twice, reversing commit fa35864e0bb7
("tuntap: Fix for a race in accessing numqueues"). I don't see anything
left that would cause a divide by zero after the relevant code was
converted from divide to multiple and subsequently even removed.

But if it's safe to read multiple times, might as well remove the READ_ONCE.

> @@ -1551,7 +1588,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
>         int copylen;
>         bool zerocopy = false;
>         int err;
> -       u32 rxhash;
> +       u32 rxhash = 0;
>         int skb_xdp = 1;
>         bool frags = tun_napi_frags_enabled(tun);
>
> @@ -1739,7 +1776,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
>                 rcu_read_unlock();
>         }
>
> -       rxhash = __skb_get_hash_symmetric(skb);
> +       rcu_read_lock();
> +       if (!rcu_dereference(tun->steering_prog))
> +               rxhash = __skb_get_hash_symmetric(skb);
> +       rcu_read_unlock();
>
>         if (frags) {
>                 /* Exercise flow dissector code path. */
> @@ -1783,7 +1823,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
>         u64_stats_update_end(&stats->syncp);
>         put_cpu_ptr(stats);
>
> -       tun_flow_update(tun, rxhash, tfile);
> +       if (rxhash)
> +               tun_flow_update(tun, rxhash, tfile);
> +

Nit: zero is a valid hash? In which case, an int64_t initialized to -1 is the
safer check.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ