lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-ID: <d53e8ed1-33a7-a29a-c53a-0be9e1a97901@redhat.com> Date: Tue, 5 Dec 2017 15:29:52 +0800 From: Jason Wang <jasowang@...hat.com> To: Willem de Bruijn <willemdebruijn.kernel@...il.com> Cc: Network Development <netdev@...r.kernel.org>, LKML <linux-kernel@...r.kernel.org>, "Michael S. Tsirkin" <mst@...hat.com>, Tom Herbert <tom@...bertland.com>, aconole@...hat.com, wexu@...hat.com Subject: Re: [PATCH net-next V3] tun: add eBPF based queue selection method On 2017年12月05日 08:16, Willem de Bruijn wrote: > On Mon, Dec 4, 2017 at 4:31 AM, Jason Wang <jasowang@...hat.com> wrote: >> This patch introduces an eBPF based queue selection method. With this, >> the policy could be offloaded to userspace completely through a new >> ioctl TUNSETSTEERINGEBPF. >> >> Signed-off-by: Jason Wang <jasowang@...hat.com> >> --- >> +static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb) >> +{ >> + struct tun_steering_prog *prog; >> + u16 ret = 0; >> + >> + prog = rcu_dereference(tun->steering_prog); >> + if (prog) >> + ret = bpf_prog_run_clear_cb(prog->prog, skb); > This dereferences tun->steering_prog for a second time. It is safe > in this load balancing case to assign a few extra packets to queue 0. > But the issue can also be avoided by replacing the function with a > direct call in tun_net_xmit: > > struct tun_steering_prog *s = rcu_dereference(tun->steering_prog); > if (s) > ret = bpf_prog_run_clear_cb(s->prog, skb) % tun->numqueues; Right. > >> /* Net device start xmit */ >> -static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) >> +static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb) >> { >> - struct tun_struct *tun = netdev_priv(dev); >> - int txq = skb->queue_mapping; >> - struct tun_file *tfile; >> - u32 numqueues = 0; >> - >> - rcu_read_lock(); >> - tfile = rcu_dereference(tun->tfiles[txq]); >> - numqueues = READ_ONCE(tun->numqueues); >> - >> - /* Drop packet if interface is not attached */ >> - if (txq >= numqueues) >> - goto drop; >> - >> #ifdef CONFIG_RPS >> - if (numqueues == 1 && static_key_false(&rps_needed)) { >> + if (tun->numqueues == 1 && static_key_false(&rps_needed)) { >> /* Select queue was not called for the skbuff, so we extract the >> * RPS hash and save it into the flow_table here. >> */ >> @@ -969,6 +986,26 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) >> } >> } >> #endif >> +} >> + >> +/* Net device start xmit */ >> +static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) >> +{ >> + struct tun_struct *tun = netdev_priv(dev); >> + int txq = skb->queue_mapping; >> + struct tun_file *tfile; >> + u32 numqueues = 0; >> + >> + rcu_read_lock(); >> + tfile = rcu_dereference(tun->tfiles[txq]); >> + numqueues = READ_ONCE(tun->numqueues); > Now tun->numqueues is read twice, reversing commit fa35864e0bb7 > ("tuntap: Fix for a race in accessing numqueues"). I don't see anything > left that would cause a divide by zero after the relevant code was > converted from divide to multiple and subsequently even removed. > > But if it's safe to read multiple times, might as well remove the READ_ONCE. Good point, but READ_ONCE() is not something new, we'd better change this in another patch. > >> @@ -1551,7 +1588,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, >> int copylen; >> bool zerocopy = false; >> int err; >> - u32 rxhash; >> + u32 rxhash = 0; >> int skb_xdp = 1; >> bool frags = tun_napi_frags_enabled(tun); >> >> @@ -1739,7 +1776,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, >> rcu_read_unlock(); >> } >> >> - rxhash = __skb_get_hash_symmetric(skb); >> + rcu_read_lock(); >> + if (!rcu_dereference(tun->steering_prog)) >> + rxhash = __skb_get_hash_symmetric(skb); >> + rcu_read_unlock(); >> >> if (frags) { >> /* Exercise flow dissector code path. */ >> @@ -1783,7 +1823,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, >> u64_stats_update_end(&stats->syncp); >> put_cpu_ptr(stats); >> >> - tun_flow_update(tun, rxhash, tfile); >> + if (rxhash) >> + tun_flow_update(tun, rxhash, tfile); >> + > Nit: zero is a valid hash? In which case, an int64_t initialized to -1 is the > safer check. Looks not? E.g looking at __flow_hash_from_keys() it did: static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) { u32 hash; __flow_hash_consistentify(keys); hash = __flow_hash_words(flow_keys_hash_start(keys), flow_keys_hash_length(keys), keyval); if (!hash) hash = 1; return hash; } Thanks
Powered by blists - more mailing lists