lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <169272715407.1975370.3989385869434330916.stgit@firesoul>
Date: Tue, 22 Aug 2023 19:59:14 +0200
From: Jesper Dangaard Brouer <hawk@...nel.org>
To: netdev@...r.kernel.org, edumazet@...gle.com
Cc: Jesper Dangaard Brouer <hawk@...nel.org>, pabeni@...hat.com,
 kuba@...nel.org, davem@...emloft.net, lorenzo@...nel.org,
 Ilias Apalodimas <ilias.apalodimas@...aro.org>, mtahhan@...hat.com,
 huangjie.albert@...edance.com, Yunsheng Lin <linyunsheng@...wei.com>,
 Liang Chen <liangchen.linux@...il.com>
Subject: [PATCH net-next RFC v1 2/4] veth: use generic-XDP functions when
 dealing with SKBs

The root-cause the realloc issue is that veth_xdp_rcv_skb() code path (that
handles SKBs like generic-XDP) is calling a native-XDP function
xdp_do_redirect(), instead of simply using xdp_do_generic_redirect() that can
handle SKBs.

The existing code tries to steal the packet-data from the SKB (and frees the SKB
itself). This cause issues as SKBs can have different memory models that are
incompatible with native-XDP call xdp_do_redirect(). For this reason the checks
in veth_convert_skb_to_xdp_buff() becomes more strict. This in turn makes this a
bad approach. Simply leveraging generic-XDP helpers e.g. generic_xdp_tx() and
xdp_do_generic_redirect() as this resolves the issue given netstack can handle
these different SKB memory models.

Signed-off-by: Jesper Dangaard Brouer <hawk@...nel.org>
---
 drivers/net/veth.c |   69 ++++++++++++++++++++--------------------------------
 net/core/dev.c     |    1 +
 net/core/filter.c  |    1 +
 3 files changed, 28 insertions(+), 43 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index be7b62f57087..192547035194 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -713,19 +713,6 @@ static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames,
 	}
 }
 
-static void veth_xdp_get(struct xdp_buff *xdp)
-{
-	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
-	int i;
-
-	get_page(virt_to_page(xdp->data));
-	if (likely(!xdp_buff_has_frags(xdp)))
-		return;
-
-	for (i = 0; i < sinfo->nr_frags; i++)
-		__skb_frag_ref(&sinfo->frags[i]);
-}
-
 static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 					struct xdp_buff *xdp,
 					struct sk_buff **pskb)
@@ -837,7 +824,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 	struct veth_xdp_buff vxbuf;
 	struct xdp_buff *xdp = &vxbuf.xdp;
 	u32 act, metalen;
-	int off;
+	int off, err;
 
 	skb_prepare_for_gro(skb);
 
@@ -860,30 +847,10 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 
 	switch (act) {
 	case XDP_PASS:
-		break;
 	case XDP_TX:
-		veth_xdp_get(xdp);
-		consume_skb(skb);
-		xdp->rxq->mem = rq->xdp_mem;
-		if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) {
-			trace_xdp_exception(rq->dev, xdp_prog, act);
-			stats->rx_drops++;
-			goto err_xdp;
-		}
-		stats->xdp_tx++;
-		rcu_read_unlock();
-		goto xdp_xmit;
 	case XDP_REDIRECT:
-		veth_xdp_get(xdp);
-		consume_skb(skb);
-		xdp->rxq->mem = rq->xdp_mem;
-		if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
-			stats->rx_drops++;
-			goto err_xdp;
-		}
-		stats->xdp_redirect++;
-		rcu_read_unlock();
-		goto xdp_xmit;
+		/* Postpone actions to after potential SKB geometry update */
+		break;
 	default:
 		bpf_warn_invalid_xdp_action(rq->dev, xdp_prog, act);
 		fallthrough;
@@ -894,7 +861,6 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 		stats->xdp_drops++;
 		goto xdp_drop;
 	}
-	rcu_read_unlock();
 
 	/* check if bpf_xdp_adjust_head was used */
 	off = xdp->data - orig_data;
@@ -919,11 +885,32 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 	else
 		skb->data_len = 0;
 
-	skb->protocol = eth_type_trans(skb, rq->dev);
-
 	metalen = xdp->data - xdp->data_meta;
 	if (metalen)
 		skb_metadata_set(skb, metalen);
+
+	switch (act) {
+	case XDP_PASS:
+		/* This skb_pull's off mac_len, __skb_push'ed above */
+		skb->protocol = eth_type_trans(skb, rq->dev);
+		break;
+	case XDP_REDIRECT:
+		err = xdp_do_generic_redirect(rq->dev, skb, xdp, xdp_prog);
+		if (unlikely(err)) {
+			trace_xdp_exception(rq->dev, xdp_prog, act);
+			goto xdp_drop;
+		}
+		stats->xdp_redirect++;
+		rcu_read_unlock();
+		goto xdp_xmit;
+	case XDP_TX:
+		/* TODO: this can be optimized to be veth specific */
+		generic_xdp_tx(skb, xdp_prog);
+		stats->xdp_tx++;
+		rcu_read_unlock();
+		goto xdp_xmit;
+	}
+	rcu_read_unlock();
 out:
 	return skb;
 drop:
@@ -931,10 +918,6 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 xdp_drop:
 	rcu_read_unlock();
 	kfree_skb(skb);
-	return NULL;
-err_xdp:
-	rcu_read_unlock();
-	xdp_return_buff(xdp);
 xdp_xmit:
 	return NULL;
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 17e6281e408c..1187bfced9ec 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4987,6 +4987,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
 		kfree_skb(skb);
 	}
 }
+EXPORT_SYMBOL_GPL(generic_xdp_tx);
 
 static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
 
diff --git a/net/core/filter.c b/net/core/filter.c
index a094694899c9..a6fd7ba901ba 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4443,6 +4443,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 	_trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
 	return err;
 }
+EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
 
 BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
 {



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ