[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aIEdS6fnblUEuYf5@boxer>
Date: Wed, 23 Jul 2025 19:35:07 +0200
From: Maciej Fijalkowski <maciej.fijalkowski@...el.com>
To: Mohsin Bashir <mohsin.bashr@...il.com>
CC: <netdev@...r.kernel.org>, <kuba@...nel.org>, <alexanderduyck@...com>,
<andrew+netdev@...n.ch>, <davem@...emloft.net>, <edumazet@...gle.com>,
<pabeni@...hat.com>, <horms@...nel.org>, <vadim.fedorenko@...ux.dev>,
<jdamato@...tly.com>, <sdf@...ichev.me>, <aleksander.lobakin@...el.com>,
<ast@...nel.org>, <daniel@...earbox.net>, <hawk@...nel.org>,
<john.fastabend@...il.com>
Subject: Re: [PATCH net-next 5/9] eth: fbnic: Add XDP pass, drop, abort
support
On Wed, Jul 23, 2025 at 07:59:22AM -0700, Mohsin Bashir wrote:
> Add basic support for attaching an XDP program to the device and support
> for PASS/DROP/ABORT actions.
> In fbnic, buffers are always mapped as DMA_BIDIRECTIONAL.
>
> Testing:
>
> Hook a simple XDP program that passes all the packets destined for a
> specific port
>
> iperf3 -c 192.168.1.10 -P 5 -p 12345
> Connecting to host 192.168.1.10, port 12345
> [ 5] local 192.168.1.9 port 46702 connected to 192.168.1.10 port 12345
> [ ID] Interval Transfer Bitrate Retr Cwnd
> - - - - - - - - - - - - - - - - - - - - - - - - -
> [SUM] 1.00-2.00 sec 3.86 GBytes 33.2 Gbits/sec 0
>
> XDP_DROP:
> Hook an XDP program that drops packets destined for a specific port
>
> iperf3 -c 192.168.1.10 -P 5 -p 12345
> ^C- - - - - - - - - - - - - - - - - - - - - - - - -
> [ ID] Interval Transfer Bitrate Retr
> [SUM] 0.00-0.00 sec 0.00 Bytes 0.00 bits/sec 0 sender
> [SUM] 0.00-0.00 sec 0.00 Bytes 0.00 bits/sec receiver
> iperf3: interrupt - the client has terminated
>
> XDP with HDS:
>
> - Validate XDP attachment failure when HDS is low
> ~] ethtool -G eth0 hds-thresh 512
> ~] sudo ip link set eth0 xdpdrv obj xdp_pass_12345.o sec xdp
> ~] Error: fbnic: MTU too high, or HDS threshold is too low for single
> buffer XDP.
>
> - Validate successful XDP attachment when HDS threshold is appropriate
> ~] ethtool -G eth0 hds-thresh 1536
> ~] sudo ip link set eth0 xdpdrv obj xdp_pass_12345.o sec xdp
>
> - Validate when the XDP program is attached, changing HDS thresh to a
> lower value fails
> ~] ethtool -G eth0 hds-thresh 512
> ~] netlink error: fbnic: Use higher HDS threshold or multi-buf capable
> program
>
> - Validate HDS thresh does not matter when xdp frags support is
> available
> ~] ethtool -G eth0 hds-thresh 512
> ~] sudo ip link set eth0 xdpdrv obj xdp_pass_mb_12345.o sec xdp.frags
>
> Signed-off-by: Jakub Kicinski <kuba@...nel.org>
> Signed-off-by: Mohsin Bashir <mohsin.bashr@...il.com>
> ---
> .../net/ethernet/meta/fbnic/fbnic_ethtool.c | 11 +++
> .../net/ethernet/meta/fbnic/fbnic_netdev.c | 35 +++++++
> .../net/ethernet/meta/fbnic/fbnic_netdev.h | 5 +
> drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 95 +++++++++++++++++--
> drivers/net/ethernet/meta/fbnic/fbnic_txrx.h | 1 +
> 5 files changed, 140 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
> index 84a0db9f1be0..d7b9eb267ead 100644
> --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
> +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
> @@ -329,6 +329,17 @@ fbnic_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
> return -EINVAL;
> }
>
> + /* If an XDP program is attached, we should check for potential frame
> + * splitting. If the new HDS threshold can cause splitting, we should
> + * only allow if the attached XDP program can handle frags.
> + */
> + if (fbnic_check_split_frames(fbn->xdp_prog, netdev->mtu,
> + kernel_ring->hds_thresh)) {
> + NL_SET_ERR_MSG_MOD(extack,
> + "Use higher HDS threshold or multi-buf capable program");
> + return -EINVAL;
> + }
> +
> if (!netif_running(netdev)) {
> fbnic_set_rings(fbn, ring, kernel_ring);
> return 0;
> diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
> index d039e1c7a0d5..0621b89cbf3d 100644
> --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
> +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
> @@ -504,6 +504,40 @@ static void fbnic_get_stats64(struct net_device *dev,
> }
> }
>
> +bool fbnic_check_split_frames(struct bpf_prog *prog, unsigned int mtu,
> + u32 hds_thresh)
> +{
> + if (!prog)
> + return false;
> +
> + if (prog->aux->xdp_has_frags)
> + return false;
> +
> + return mtu + ETH_HLEN > hds_thresh;
> +}
> +
> +static int fbnic_bpf(struct net_device *netdev, struct netdev_bpf *bpf)
> +{
> + struct bpf_prog *prog = bpf->prog, *prev_prog;
> + struct fbnic_net *fbn = netdev_priv(netdev);
> +
> + if (bpf->command != XDP_SETUP_PROG)
> + return -EINVAL;
> +
> + if (fbnic_check_split_frames(prog, netdev->mtu,
> + fbn->hds_thresh)) {
> + NL_SET_ERR_MSG_MOD(bpf->extack,
> + "MTU too high, or HDS threshold is too low for single buffer XDP");
> + return -EOPNOTSUPP;
> + }
> +
> + prev_prog = xchg(&fbn->xdp_prog, prog);
> + if (prev_prog)
> + bpf_prog_put(prev_prog);
> +
> + return 0;
> +}
> +
> static const struct net_device_ops fbnic_netdev_ops = {
> .ndo_open = fbnic_open,
> .ndo_stop = fbnic_stop,
> @@ -513,6 +547,7 @@ static const struct net_device_ops fbnic_netdev_ops = {
> .ndo_set_mac_address = fbnic_set_mac,
> .ndo_set_rx_mode = fbnic_set_rx_mode,
> .ndo_get_stats64 = fbnic_get_stats64,
> + .ndo_bpf = fbnic_bpf,
> .ndo_hwtstamp_get = fbnic_hwtstamp_get,
> .ndo_hwtstamp_set = fbnic_hwtstamp_set,
> };
> diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
> index 04c5c7ed6c3a..bfa79ea910d8 100644
> --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
> +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
> @@ -18,6 +18,8 @@
> #define FBNIC_TUN_GSO_FEATURES NETIF_F_GSO_IPXIP6
>
> struct fbnic_net {
> + struct bpf_prog *xdp_prog;
> +
> struct fbnic_ring *tx[FBNIC_MAX_TXQS];
> struct fbnic_ring *rx[FBNIC_MAX_RXQS];
>
> @@ -104,4 +106,7 @@ int fbnic_phylink_ethtool_ksettings_get(struct net_device *netdev,
> int fbnic_phylink_get_fecparam(struct net_device *netdev,
> struct ethtool_fecparam *fecparam);
> int fbnic_phylink_init(struct net_device *netdev);
> +
> +bool fbnic_check_split_frames(struct bpf_prog *prog,
> + unsigned int mtu, u32 hds_threshold);
> #endif /* _FBNIC_NETDEV_H_ */
> diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
> index 71af7b9d5bcd..486c14e83ad5 100644
> --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
> +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
> @@ -2,17 +2,26 @@
> /* Copyright (c) Meta Platforms, Inc. and affiliates. */
>
> #include <linux/bitfield.h>
> +#include <linux/bpf.h>
> +#include <linux/bpf_trace.h>
> #include <linux/iopoll.h>
> #include <linux/pci.h>
> #include <net/netdev_queues.h>
> #include <net/page_pool/helpers.h>
> #include <net/tcp.h>
> +#include <net/xdp.h>
>
> #include "fbnic.h"
> #include "fbnic_csr.h"
> #include "fbnic_netdev.h"
> #include "fbnic_txrx.h"
>
> +enum {
> + FBNIC_XDP_PASS = 0,
> + FBNIC_XDP_CONSUME,
> + FBNIC_XDP_LEN_ERR,
> +};
> +
> enum {
> FBNIC_XMIT_CB_TS = 0x01,
> };
> @@ -877,7 +886,7 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd,
>
> headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD;
> frame_sz = hdr_pg_end - hdr_pg_start;
> - xdp_init_buff(&pkt->buff, frame_sz, NULL);
> + xdp_init_buff(&pkt->buff, frame_sz, &qt->xdp_rxq);
> hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) *
> FBNIC_BD_FRAG_SIZE;
>
> @@ -966,6 +975,38 @@ static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv,
> return skb;
> }
>
> +static struct sk_buff *fbnic_run_xdp(struct fbnic_napi_vector *nv,
> + struct fbnic_pkt_buff *pkt)
> +{
> + struct fbnic_net *fbn = netdev_priv(nv->napi.dev);
> + struct bpf_prog *xdp_prog;
> + int act;
> +
> + xdp_prog = READ_ONCE(fbn->xdp_prog);
> + if (!xdp_prog)
> + goto xdp_pass;
Hi Mohsin,
I thought we were past the times when we read prog pointer per each
processed packet and agreed on reading the pointer once per napi loop?
> +
> + if (xdp_buff_has_frags(&pkt->buff) && !xdp_prog->aux->xdp_has_frags)
> + return ERR_PTR(-FBNIC_XDP_LEN_ERR);
when can this happen and couldn't you catch this within ndo_bpf? i suppose
it's related to hds setup.
> +
> + act = bpf_prog_run_xdp(xdp_prog, &pkt->buff);
> + switch (act) {
> + case XDP_PASS:
> +xdp_pass:
> + return fbnic_build_skb(nv, pkt);
> + default:
> + bpf_warn_invalid_xdp_action(nv->napi.dev, xdp_prog, act);
> + fallthrough;
> + case XDP_ABORTED:
> + trace_xdp_exception(nv->napi.dev, xdp_prog, act);
> + fallthrough;
> + case XDP_DROP:
> + break;
> + }
> +
> + return ERR_PTR(-FBNIC_XDP_CONSUME);
> +}
> +
> static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd)
> {
> return (FBNIC_RCD_META_L4_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L4 :
> @@ -1064,7 +1105,7 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
> if (unlikely(pkt->add_frag_failed))
> skb = NULL;
> else if (likely(!fbnic_rcd_metadata_err(rcd)))
> - skb = fbnic_build_skb(nv, pkt);
> + skb = fbnic_run_xdp(nv, pkt);
>
> /* Populate skb and invalidate XDP */
> if (!IS_ERR_OR_NULL(skb)) {
> @@ -1250,6 +1291,7 @@ static void fbnic_free_napi_vector(struct fbnic_net *fbn,
> }
>
> for (j = 0; j < nv->rxt_count; j++, i++) {
> + xdp_rxq_info_unreg(&nv->qt[i].xdp_rxq);
> fbnic_remove_rx_ring(fbn, &nv->qt[i].sub0);
> fbnic_remove_rx_ring(fbn, &nv->qt[i].sub1);
> fbnic_remove_rx_ring(fbn, &nv->qt[i].cmpl);
> @@ -1422,6 +1464,11 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
> fbnic_ring_init(&qt->cmpl, db, rxq_idx, FBNIC_RING_F_STATS);
> fbn->rx[rxq_idx] = &qt->cmpl;
>
> + err = xdp_rxq_info_reg(&qt->xdp_rxq, fbn->netdev, rxq_idx,
> + nv->napi.napi_id);
> + if (err)
> + goto free_ring_cur_qt;
> +
> /* Update Rx queue index */
> rxt_count--;
> rxq_idx += v_count;
> @@ -1432,6 +1479,25 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
>
> return 0;
>
> + while (rxt_count < nv->rxt_count) {
> + qt--;
> +
> + xdp_rxq_info_unreg(&qt->xdp_rxq);
> +free_ring_cur_qt:
> + fbnic_remove_rx_ring(fbn, &qt->sub0);
> + fbnic_remove_rx_ring(fbn, &qt->sub1);
> + fbnic_remove_rx_ring(fbn, &qt->cmpl);
> + rxt_count++;
> + }
> + while (txt_count < nv->txt_count) {
> + qt--;
> +
> + fbnic_remove_tx_ring(fbn, &qt->sub0);
> + fbnic_remove_tx_ring(fbn, &qt->cmpl);
> +
> + txt_count++;
> + }
> + fbnic_napi_free_irq(fbd, nv);
> pp_destroy:
> page_pool_destroy(nv->page_pool);
> napi_del:
> @@ -1708,8 +1774,10 @@ static void fbnic_free_nv_resources(struct fbnic_net *fbn,
> for (i = 0; i < nv->txt_count; i++)
> fbnic_free_qt_resources(fbn, &nv->qt[i]);
>
> - for (j = 0; j < nv->rxt_count; j++, i++)
> + for (j = 0; j < nv->rxt_count; j++, i++) {
> fbnic_free_qt_resources(fbn, &nv->qt[i]);
> + xdp_rxq_info_unreg_mem_model(&nv->qt[i].xdp_rxq);
> + }
> }
>
> static int fbnic_alloc_nv_resources(struct fbnic_net *fbn,
> @@ -1721,19 +1789,32 @@ static int fbnic_alloc_nv_resources(struct fbnic_net *fbn,
> for (i = 0; i < nv->txt_count; i++) {
> err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]);
> if (err)
> - goto free_resources;
> + goto free_qt_resources;
> }
>
> /* Allocate Rx Resources */
> for (j = 0; j < nv->rxt_count; j++, i++) {
> + /* Register XDP memory model for completion queue */
> + err = xdp_reg_mem_model(&nv->qt[i].xdp_rxq.mem,
> + MEM_TYPE_PAGE_POOL,
> + nv->page_pool);
> + if (err)
> + goto xdp_unreg_mem_model;
> +
> err = fbnic_alloc_rx_qt_resources(fbn, &nv->qt[i]);
> if (err)
> - goto free_resources;
> + goto xdp_unreg_cur_model;
> }
>
> return 0;
>
> -free_resources:
> +xdp_unreg_mem_model:
> + while (j-- && i--) {
> + fbnic_free_qt_resources(fbn, &nv->qt[i]);
> +xdp_unreg_cur_model:
> + xdp_rxq_info_unreg_mem_model(&nv->qt[i].xdp_rxq);
> + }
> +free_qt_resources:
> while (i--)
> fbnic_free_qt_resources(fbn, &nv->qt[i]);
> return err;
> @@ -2025,7 +2106,7 @@ void fbnic_flush(struct fbnic_net *fbn)
> memset(qt->cmpl.desc, 0, qt->cmpl.size);
>
> fbnic_put_pkt_buff(nv, qt->cmpl.pkt, 0);
> - qt->cmpl.pkt->buff.data_hard_start = NULL;
> + memset(qt->cmpl.pkt, 0, sizeof(struct fbnic_pkt_buff));
> }
> }
> }
> diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
> index be34962c465e..0fefd1f00196 100644
> --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
> +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
> @@ -129,6 +129,7 @@ struct fbnic_ring {
>
> struct fbnic_q_triad {
> struct fbnic_ring sub0, sub1, cmpl;
> + struct xdp_rxq_info xdp_rxq;
> };
>
> struct fbnic_napi_vector {
> --
> 2.47.1
>
>
Powered by blists - more mailing lists