[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20231215171020.687342-17-bigeasy@linutronix.de>
Date: Fri, 15 Dec 2023 18:07:35 +0100
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: linux-kernel@...r.kernel.org,
netdev@...r.kernel.org
Cc: "David S. Miller" <davem@...emloft.net>,
Boqun Feng <boqun.feng@...il.com>,
Daniel Borkmann <daniel@...earbox.net>,
Eric Dumazet <edumazet@...gle.com>,
Frederic Weisbecker <frederic@...nel.org>,
Ingo Molnar <mingo@...hat.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Thomas Gleixner <tglx@...utronix.de>,
Waiman Long <longman@...hat.com>,
Will Deacon <will@...nel.org>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
"K. Y. Srinivasan" <kys@...rosoft.com>,
"Michael S. Tsirkin" <mst@...hat.com>,
Alexei Starovoitov <ast@...nel.org>,
Andrii Nakryiko <andrii@...nel.org>,
Dexuan Cui <decui@...rosoft.com>,
Haiyang Zhang <haiyangz@...rosoft.com>,
Hao Luo <haoluo@...gle.com>,
Jesper Dangaard Brouer <hawk@...nel.org>,
Jiri Olsa <jolsa@...nel.org>,
John Fastabend <john.fastabend@...il.com>,
Juergen Gross <jgross@...e.com>,
KP Singh <kpsingh@...nel.org>,
Martin KaFai Lau <martin.lau@...ux.dev>,
Nikolay Aleksandrov <razor@...ckwall.org>,
Song Liu <song@...nel.org>,
Stanislav Fomichev <sdf@...gle.com>,
Stefano Stabellini <sstabellini@...nel.org>,
Wei Liu <wei.liu@...nel.org>,
Willem de Bruijn <willemdebruijn.kernel@...il.com>,
Xuan Zhuo <xuanzhuo@...ux.alibaba.com>,
Yonghong Song <yonghong.song@...ux.dev>,
bpf@...r.kernel.org,
virtualization@...ts.linux.dev,
xen-devel@...ts.xenproject.org
Subject: [PATCH net-next 16/24] net: netkit, veth, tun, virt*: Use nested-BH locking for XDP redirect.
The per-CPU variables used during bpf_prog_run_xdp() invocation and
later during xdp_do_redirect() rely on disabled BH for their protection.
Without locking in local_bh_disable() on PREEMPT_RT these data structure
require explicit locking.
This is a follow-up on the previous change which introduced
bpf_run_lock.redirect_lock and uses it now within drivers.
The simple way is to acquire the lock before bpf_prog_run_xdp() is
invoked and hold it until the end of function.
This does not always work because some drivers (cpsw, atlantic) invoke
xdp_do_flush() in the same context.
Acquiring the lock in bpf_prog_run_xdp() and dropping in
xdp_do_redirect() (without touching drivers) does not work because not
all driver, which use bpf_prog_run_xdp(), do support XDP_REDIRECT (and
invoke xdp_do_redirect()).
Ideally the minimal locking scope would be bpf_prog_run_xdp() +
xdp_do_redirect() and everything else (error recovery, DMA unmapping,
free/ alloc of memory, …) would happen outside of the locked section.
Cc: "K. Y. Srinivasan" <kys@...rosoft.com>
Cc: "Michael S. Tsirkin" <mst@...hat.com>
Cc: Alexei Starovoitov <ast@...nel.org>
Cc: Andrii Nakryiko <andrii@...nel.org>
Cc: Dexuan Cui <decui@...rosoft.com>
Cc: Haiyang Zhang <haiyangz@...rosoft.com>
Cc: Hao Luo <haoluo@...gle.com>
Cc: Jesper Dangaard Brouer <hawk@...nel.org>
Cc: Jiri Olsa <jolsa@...nel.org>
Cc: John Fastabend <john.fastabend@...il.com>
Cc: Juergen Gross <jgross@...e.com>
Cc: KP Singh <kpsingh@...nel.org>
Cc: Martin KaFai Lau <martin.lau@...ux.dev>
Cc: Nikolay Aleksandrov <razor@...ckwall.org>
Cc: Song Liu <song@...nel.org>
Cc: Stanislav Fomichev <sdf@...gle.com>
Cc: Stefano Stabellini <sstabellini@...nel.org>
Cc: Wei Liu <wei.liu@...nel.org>
Cc: Willem de Bruijn <willemdebruijn.kernel@...il.com>
Cc: Xuan Zhuo <xuanzhuo@...ux.alibaba.com>
Cc: Yonghong Song <yonghong.song@...ux.dev>
Cc: bpf@...r.kernel.org
Cc: virtualization@...ts.linux.dev
Cc: xen-devel@...ts.xenproject.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
drivers/net/hyperv/netvsc_bpf.c | 1 +
drivers/net/netkit.c | 13 +++++++----
drivers/net/tun.c | 28 +++++++++++++----------
drivers/net/veth.c | 40 ++++++++++++++++++++-------------
drivers/net/virtio_net.c | 1 +
drivers/net/xen-netfront.c | 1 +
6 files changed, 52 insertions(+), 32 deletions(-)
diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c
index 4a9522689fa4f..55f8ca92ca199 100644
--- a/drivers/net/hyperv/netvsc_bpf.c
+++ b/drivers/net/hyperv/netvsc_bpf.c
@@ -58,6 +58,7 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
memcpy(xdp->data, data, len);
+ guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock);
act = bpf_prog_run_xdp(prog, xdp);
switch (act) {
diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
index 39171380ccf29..fbcf78477bda8 100644
--- a/drivers/net/netkit.c
+++ b/drivers/net/netkit.c
@@ -80,8 +80,15 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)));
skb->dev = peer;
entry = rcu_dereference(nk->active);
- if (entry)
- ret = netkit_run(entry, skb, ret);
+ if (entry) {
+ scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) {
+ ret = netkit_run(entry, skb, ret);
+ if (ret == NETKIT_REDIRECT) {
+ dev_sw_netstats_tx_add(dev, 1, len);
+ skb_do_redirect(skb);
+ }
+ }
+ }
switch (ret) {
case NETKIT_NEXT:
case NETKIT_PASS:
@@ -95,8 +102,6 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
}
break;
case NETKIT_REDIRECT:
- dev_sw_netstats_tx_add(dev, 1, len);
- skb_do_redirect(skb);
break;
case NETKIT_DROP:
default:
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index afa5497f7c35c..fe0d31f11e4b6 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1708,16 +1708,18 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq);
xdp_prepare_buff(&xdp, buf, pad, len, false);
- act = bpf_prog_run_xdp(xdp_prog, &xdp);
- if (act == XDP_REDIRECT || act == XDP_TX) {
- get_page(alloc_frag->page);
- alloc_frag->offset += buflen;
- }
- err = tun_xdp_act(tun, xdp_prog, &xdp, act);
- if (err < 0) {
- if (act == XDP_REDIRECT || act == XDP_TX)
- put_page(alloc_frag->page);
- goto out;
+ scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) {
+ act = bpf_prog_run_xdp(xdp_prog, &xdp);
+ if (act == XDP_REDIRECT || act == XDP_TX) {
+ get_page(alloc_frag->page);
+ alloc_frag->offset += buflen;
+ }
+ err = tun_xdp_act(tun, xdp_prog, &xdp, act);
+ if (err < 0) {
+ if (act == XDP_REDIRECT || act == XDP_TX)
+ put_page(alloc_frag->page);
+ goto out;
+ }
}
if (err == XDP_REDIRECT)
@@ -2460,8 +2462,10 @@ static int tun_xdp_one(struct tun_struct *tun,
xdp_init_buff(xdp, buflen, &tfile->xdp_rxq);
xdp_set_data_meta_invalid(xdp);
- act = bpf_prog_run_xdp(xdp_prog, xdp);
- ret = tun_xdp_act(tun, xdp_prog, xdp, act);
+ scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) {
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+ ret = tun_xdp_act(tun, xdp_prog, xdp, act);
+ }
if (ret < 0) {
put_page(virt_to_head_page(xdp->data));
return ret;
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 977861c46b1fe..c69e5ff9f8795 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -624,7 +624,18 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
xdp->rxq = &rq->xdp_rxq;
vxbuf.skb = NULL;
- act = bpf_prog_run_xdp(xdp_prog, xdp);
+ scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) {
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+ if (act == XDP_REDIRECT) {
+ orig_frame = *frame;
+ xdp->rxq->mem = frame->mem;
+ if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
+ frame = &orig_frame;
+ stats->xdp_drops++;
+ goto err_xdp;
+ }
+ }
+ }
switch (act) {
case XDP_PASS:
@@ -644,13 +655,6 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
rcu_read_unlock();
goto xdp_xmit;
case XDP_REDIRECT:
- orig_frame = *frame;
- xdp->rxq->mem = frame->mem;
- if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
- frame = &orig_frame;
- stats->rx_drops++;
- goto err_xdp;
- }
stats->xdp_redirect++;
rcu_read_unlock();
goto xdp_xmit;
@@ -857,7 +861,18 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
orig_data = xdp->data;
orig_data_end = xdp->data_end;
- act = bpf_prog_run_xdp(xdp_prog, xdp);
+ scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) {
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+ if (act == XDP_REDIRECT) {
+ veth_xdp_get(xdp);
+ consume_skb(skb);
+ xdp->rxq->mem = rq->xdp_mem;
+ if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
+ stats->rx_drops++;
+ goto err_xdp;
+ }
+ }
+ }
switch (act) {
case XDP_PASS:
@@ -875,13 +890,6 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
rcu_read_unlock();
goto xdp_xmit;
case XDP_REDIRECT:
- veth_xdp_get(xdp);
- consume_skb(skb);
- xdp->rxq->mem = rq->xdp_mem;
- if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
- stats->rx_drops++;
- goto err_xdp;
- }
stats->xdp_redirect++;
rcu_read_unlock();
goto xdp_xmit;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index d16f592c2061f..5e362c4604239 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1010,6 +1010,7 @@ static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
int err;
u32 act;
+ guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock);
act = bpf_prog_run_xdp(xdp_prog, xdp);
u64_stats_inc(&stats->xdp_packets);
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index ad29f370034e4..e3daa8cdeb84e 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -978,6 +978,7 @@ static u32 xennet_run_xdp(struct netfront_queue *queue, struct page *pdata,
xdp_prepare_buff(xdp, page_address(pdata), XDP_PACKET_HEADROOM,
len, false);
+ guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock);
act = bpf_prog_run_xdp(prog, xdp);
switch (act) {
case XDP_TX:
--
2.43.0
Powered by blists - more mailing lists