[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250723145926.4120434-6-mohsin.bashr@gmail.com>
Date: Wed, 23 Jul 2025 07:59:22 -0700
From: Mohsin Bashir <mohsin.bashr@...il.com>
To: netdev@...r.kernel.org
Cc: kuba@...nel.org,
alexanderduyck@...com,
andrew+netdev@...n.ch,
davem@...emloft.net,
edumazet@...gle.com,
pabeni@...hat.com,
mohsin.bashr@...il.com,
horms@...nel.org,
vadim.fedorenko@...ux.dev,
jdamato@...tly.com,
sdf@...ichev.me,
aleksander.lobakin@...el.com,
ast@...nel.org,
daniel@...earbox.net,
hawk@...nel.org,
john.fastabend@...il.com
Subject: [PATCH net-next 5/9] eth: fbnic: Add XDP pass, drop, abort support
Add basic support for attaching an XDP program to the device and support
for PASS/DROP/ABORT actions.
In fbnic, buffers are always mapped as DMA_BIDIRECTIONAL.
Testing:
Hook a simple XDP program that passes all the packets destined for a
specific port
iperf3 -c 192.168.1.10 -P 5 -p 12345
Connecting to host 192.168.1.10, port 12345
[ 5] local 192.168.1.9 port 46702 connected to 192.168.1.10 port 12345
[ ID] Interval Transfer Bitrate Retr Cwnd
- - - - - - - - - - - - - - - - - - - - - - - - -
[SUM] 1.00-2.00 sec 3.86 GBytes 33.2 Gbits/sec 0
XDP_DROP:
Hook an XDP program that drops packets destined for a specific port
iperf3 -c 192.168.1.10 -P 5 -p 12345
^C- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval Transfer Bitrate Retr
[SUM] 0.00-0.00 sec 0.00 Bytes 0.00 bits/sec 0 sender
[SUM] 0.00-0.00 sec 0.00 Bytes 0.00 bits/sec receiver
iperf3: interrupt - the client has terminated
XDP with HDS:
- Validate XDP attachment failure when HDS is low
~] ethtool -G eth0 hds-thresh 512
~] sudo ip link set eth0 xdpdrv obj xdp_pass_12345.o sec xdp
~] Error: fbnic: MTU too high, or HDS threshold is too low for single
buffer XDP.
- Validate successful XDP attachment when HDS threshold is appropriate
~] ethtool -G eth0 hds-thresh 1536
~] sudo ip link set eth0 xdpdrv obj xdp_pass_12345.o sec xdp
- Validate when the XDP program is attached, changing HDS thresh to a
lower value fails
~] ethtool -G eth0 hds-thresh 512
~] netlink error: fbnic: Use higher HDS threshold or multi-buf capable
program
- Validate HDS thresh does not matter when xdp frags support is
available
~] ethtool -G eth0 hds-thresh 512
~] sudo ip link set eth0 xdpdrv obj xdp_pass_mb_12345.o sec xdp.frags
Signed-off-by: Jakub Kicinski <kuba@...nel.org>
Signed-off-by: Mohsin Bashir <mohsin.bashr@...il.com>
---
.../net/ethernet/meta/fbnic/fbnic_ethtool.c | 11 +++
.../net/ethernet/meta/fbnic/fbnic_netdev.c | 35 +++++++
.../net/ethernet/meta/fbnic/fbnic_netdev.h | 5 +
drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 95 +++++++++++++++++--
drivers/net/ethernet/meta/fbnic/fbnic_txrx.h | 1 +
5 files changed, 140 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
index 84a0db9f1be0..d7b9eb267ead 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
@@ -329,6 +329,17 @@ fbnic_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
return -EINVAL;
}
+ /* If an XDP program is attached, we should check for potential frame
+ * splitting. If the new HDS threshold can cause splitting, we should
+ * only allow if the attached XDP program can handle frags.
+ */
+ if (fbnic_check_split_frames(fbn->xdp_prog, netdev->mtu,
+ kernel_ring->hds_thresh)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Use higher HDS threshold or multi-buf capable program");
+ return -EINVAL;
+ }
+
if (!netif_running(netdev)) {
fbnic_set_rings(fbn, ring, kernel_ring);
return 0;
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
index d039e1c7a0d5..0621b89cbf3d 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
@@ -504,6 +504,40 @@ static void fbnic_get_stats64(struct net_device *dev,
}
}
+bool fbnic_check_split_frames(struct bpf_prog *prog, unsigned int mtu,
+ u32 hds_thresh)
+{
+ if (!prog)
+ return false;
+
+ if (prog->aux->xdp_has_frags)
+ return false;
+
+ return mtu + ETH_HLEN > hds_thresh;
+}
+
+static int fbnic_bpf(struct net_device *netdev, struct netdev_bpf *bpf)
+{
+ struct bpf_prog *prog = bpf->prog, *prev_prog;
+ struct fbnic_net *fbn = netdev_priv(netdev);
+
+ if (bpf->command != XDP_SETUP_PROG)
+ return -EINVAL;
+
+ if (fbnic_check_split_frames(prog, netdev->mtu,
+ fbn->hds_thresh)) {
+ NL_SET_ERR_MSG_MOD(bpf->extack,
+ "MTU too high, or HDS threshold is too low for single buffer XDP");
+ return -EOPNOTSUPP;
+ }
+
+ prev_prog = xchg(&fbn->xdp_prog, prog);
+ if (prev_prog)
+ bpf_prog_put(prev_prog);
+
+ return 0;
+}
+
static const struct net_device_ops fbnic_netdev_ops = {
.ndo_open = fbnic_open,
.ndo_stop = fbnic_stop,
@@ -513,6 +547,7 @@ static const struct net_device_ops fbnic_netdev_ops = {
.ndo_set_mac_address = fbnic_set_mac,
.ndo_set_rx_mode = fbnic_set_rx_mode,
.ndo_get_stats64 = fbnic_get_stats64,
+ .ndo_bpf = fbnic_bpf,
.ndo_hwtstamp_get = fbnic_hwtstamp_get,
.ndo_hwtstamp_set = fbnic_hwtstamp_set,
};
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
index 04c5c7ed6c3a..bfa79ea910d8 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
@@ -18,6 +18,8 @@
#define FBNIC_TUN_GSO_FEATURES NETIF_F_GSO_IPXIP6
struct fbnic_net {
+ struct bpf_prog *xdp_prog;
+
struct fbnic_ring *tx[FBNIC_MAX_TXQS];
struct fbnic_ring *rx[FBNIC_MAX_RXQS];
@@ -104,4 +106,7 @@ int fbnic_phylink_ethtool_ksettings_get(struct net_device *netdev,
int fbnic_phylink_get_fecparam(struct net_device *netdev,
struct ethtool_fecparam *fecparam);
int fbnic_phylink_init(struct net_device *netdev);
+
+bool fbnic_check_split_frames(struct bpf_prog *prog,
+ unsigned int mtu, u32 hds_threshold);
#endif /* _FBNIC_NETDEV_H_ */
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
index 71af7b9d5bcd..486c14e83ad5 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
@@ -2,17 +2,26 @@
/* Copyright (c) Meta Platforms, Inc. and affiliates. */
#include <linux/bitfield.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
#include <linux/iopoll.h>
#include <linux/pci.h>
#include <net/netdev_queues.h>
#include <net/page_pool/helpers.h>
#include <net/tcp.h>
+#include <net/xdp.h>
#include "fbnic.h"
#include "fbnic_csr.h"
#include "fbnic_netdev.h"
#include "fbnic_txrx.h"
+enum {
+ FBNIC_XDP_PASS = 0,
+ FBNIC_XDP_CONSUME,
+ FBNIC_XDP_LEN_ERR,
+};
+
enum {
FBNIC_XMIT_CB_TS = 0x01,
};
@@ -877,7 +886,7 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd,
headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD;
frame_sz = hdr_pg_end - hdr_pg_start;
- xdp_init_buff(&pkt->buff, frame_sz, NULL);
+ xdp_init_buff(&pkt->buff, frame_sz, &qt->xdp_rxq);
hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) *
FBNIC_BD_FRAG_SIZE;
@@ -966,6 +975,38 @@ static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv,
return skb;
}
+static struct sk_buff *fbnic_run_xdp(struct fbnic_napi_vector *nv,
+ struct fbnic_pkt_buff *pkt)
+{
+ struct fbnic_net *fbn = netdev_priv(nv->napi.dev);
+ struct bpf_prog *xdp_prog;
+ int act;
+
+ xdp_prog = READ_ONCE(fbn->xdp_prog);
+ if (!xdp_prog)
+ goto xdp_pass;
+
+ if (xdp_buff_has_frags(&pkt->buff) && !xdp_prog->aux->xdp_has_frags)
+ return ERR_PTR(-FBNIC_XDP_LEN_ERR);
+
+ act = bpf_prog_run_xdp(xdp_prog, &pkt->buff);
+ switch (act) {
+ case XDP_PASS:
+xdp_pass:
+ return fbnic_build_skb(nv, pkt);
+ default:
+ bpf_warn_invalid_xdp_action(nv->napi.dev, xdp_prog, act);
+ fallthrough;
+ case XDP_ABORTED:
+ trace_xdp_exception(nv->napi.dev, xdp_prog, act);
+ fallthrough;
+ case XDP_DROP:
+ break;
+ }
+
+ return ERR_PTR(-FBNIC_XDP_CONSUME);
+}
+
static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd)
{
return (FBNIC_RCD_META_L4_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L4 :
@@ -1064,7 +1105,7 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
if (unlikely(pkt->add_frag_failed))
skb = NULL;
else if (likely(!fbnic_rcd_metadata_err(rcd)))
- skb = fbnic_build_skb(nv, pkt);
+ skb = fbnic_run_xdp(nv, pkt);
/* Populate skb and invalidate XDP */
if (!IS_ERR_OR_NULL(skb)) {
@@ -1250,6 +1291,7 @@ static void fbnic_free_napi_vector(struct fbnic_net *fbn,
}
for (j = 0; j < nv->rxt_count; j++, i++) {
+ xdp_rxq_info_unreg(&nv->qt[i].xdp_rxq);
fbnic_remove_rx_ring(fbn, &nv->qt[i].sub0);
fbnic_remove_rx_ring(fbn, &nv->qt[i].sub1);
fbnic_remove_rx_ring(fbn, &nv->qt[i].cmpl);
@@ -1422,6 +1464,11 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
fbnic_ring_init(&qt->cmpl, db, rxq_idx, FBNIC_RING_F_STATS);
fbn->rx[rxq_idx] = &qt->cmpl;
+ err = xdp_rxq_info_reg(&qt->xdp_rxq, fbn->netdev, rxq_idx,
+ nv->napi.napi_id);
+ if (err)
+ goto free_ring_cur_qt;
+
/* Update Rx queue index */
rxt_count--;
rxq_idx += v_count;
@@ -1432,6 +1479,25 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
return 0;
+ while (rxt_count < nv->rxt_count) {
+ qt--;
+
+ xdp_rxq_info_unreg(&qt->xdp_rxq);
+free_ring_cur_qt:
+ fbnic_remove_rx_ring(fbn, &qt->sub0);
+ fbnic_remove_rx_ring(fbn, &qt->sub1);
+ fbnic_remove_rx_ring(fbn, &qt->cmpl);
+ rxt_count++;
+ }
+ while (txt_count < nv->txt_count) {
+ qt--;
+
+ fbnic_remove_tx_ring(fbn, &qt->sub0);
+ fbnic_remove_tx_ring(fbn, &qt->cmpl);
+
+ txt_count++;
+ }
+ fbnic_napi_free_irq(fbd, nv);
pp_destroy:
page_pool_destroy(nv->page_pool);
napi_del:
@@ -1708,8 +1774,10 @@ static void fbnic_free_nv_resources(struct fbnic_net *fbn,
for (i = 0; i < nv->txt_count; i++)
fbnic_free_qt_resources(fbn, &nv->qt[i]);
- for (j = 0; j < nv->rxt_count; j++, i++)
+ for (j = 0; j < nv->rxt_count; j++, i++) {
fbnic_free_qt_resources(fbn, &nv->qt[i]);
+ xdp_rxq_info_unreg_mem_model(&nv->qt[i].xdp_rxq);
+ }
}
static int fbnic_alloc_nv_resources(struct fbnic_net *fbn,
@@ -1721,19 +1789,32 @@ static int fbnic_alloc_nv_resources(struct fbnic_net *fbn,
for (i = 0; i < nv->txt_count; i++) {
err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]);
if (err)
- goto free_resources;
+ goto free_qt_resources;
}
/* Allocate Rx Resources */
for (j = 0; j < nv->rxt_count; j++, i++) {
+ /* Register XDP memory model for completion queue */
+ err = xdp_reg_mem_model(&nv->qt[i].xdp_rxq.mem,
+ MEM_TYPE_PAGE_POOL,
+ nv->page_pool);
+ if (err)
+ goto xdp_unreg_mem_model;
+
err = fbnic_alloc_rx_qt_resources(fbn, &nv->qt[i]);
if (err)
- goto free_resources;
+ goto xdp_unreg_cur_model;
}
return 0;
-free_resources:
+xdp_unreg_mem_model:
+ while (j-- && i--) {
+ fbnic_free_qt_resources(fbn, &nv->qt[i]);
+xdp_unreg_cur_model:
+ xdp_rxq_info_unreg_mem_model(&nv->qt[i].xdp_rxq);
+ }
+free_qt_resources:
while (i--)
fbnic_free_qt_resources(fbn, &nv->qt[i]);
return err;
@@ -2025,7 +2106,7 @@ void fbnic_flush(struct fbnic_net *fbn)
memset(qt->cmpl.desc, 0, qt->cmpl.size);
fbnic_put_pkt_buff(nv, qt->cmpl.pkt, 0);
- qt->cmpl.pkt->buff.data_hard_start = NULL;
+ memset(qt->cmpl.pkt, 0, sizeof(struct fbnic_pkt_buff));
}
}
}
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
index be34962c465e..0fefd1f00196 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
@@ -129,6 +129,7 @@ struct fbnic_ring {
struct fbnic_q_triad {
struct fbnic_ring sub0, sub1, cmpl;
+ struct xdp_rxq_info xdp_rxq;
};
struct fbnic_napi_vector {
--
2.47.1
Powered by blists - more mailing lists