lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aIEdS6fnblUEuYf5@boxer>
Date: Wed, 23 Jul 2025 19:35:07 +0200
From: Maciej Fijalkowski <maciej.fijalkowski@...el.com>
To: Mohsin Bashir <mohsin.bashr@...il.com>
CC: <netdev@...r.kernel.org>, <kuba@...nel.org>, <alexanderduyck@...com>,
	<andrew+netdev@...n.ch>, <davem@...emloft.net>, <edumazet@...gle.com>,
	<pabeni@...hat.com>, <horms@...nel.org>, <vadim.fedorenko@...ux.dev>,
	<jdamato@...tly.com>, <sdf@...ichev.me>, <aleksander.lobakin@...el.com>,
	<ast@...nel.org>, <daniel@...earbox.net>, <hawk@...nel.org>,
	<john.fastabend@...il.com>
Subject: Re: [PATCH net-next 5/9] eth: fbnic: Add XDP pass, drop, abort
 support

On Wed, Jul 23, 2025 at 07:59:22AM -0700, Mohsin Bashir wrote:
> Add basic support for attaching an XDP program to the device and support
> for PASS/DROP/ABORT actions.
> In fbnic, buffers are always mapped as DMA_BIDIRECTIONAL.
> 
> Testing:
> 
> Hook a simple XDP program that passes all the packets destined for a
> specific port
> 
> iperf3 -c 192.168.1.10 -P 5 -p 12345
> Connecting to host 192.168.1.10, port 12345
> [  5] local 192.168.1.9 port 46702 connected to 192.168.1.10 port 12345
> [ ID] Interval           Transfer     Bitrate         Retr  Cwnd
> - - - - - - - - - - - - - - - - - - - - - - - - -
> [SUM]   1.00-2.00   sec  3.86 GBytes  33.2 Gbits/sec    0
> 
> XDP_DROP:
> Hook an XDP program that drops packets destined for a specific port
> 
>  iperf3 -c 192.168.1.10 -P 5 -p 12345
> ^C- - - - - - - - - - - - - - - - - - - - - - - - -
> [ ID] Interval           Transfer     Bitrate         Retr
> [SUM]   0.00-0.00   sec  0.00 Bytes  0.00 bits/sec    0       sender
> [SUM]   0.00-0.00   sec  0.00 Bytes  0.00 bits/sec            receiver
> iperf3: interrupt - the client has terminated
> 
> XDP with HDS:
> 
> - Validate XDP attachment failure when HDS is low
>    ~] ethtool -G eth0 hds-thresh 512
>    ~] sudo ip link set eth0 xdpdrv obj xdp_pass_12345.o sec xdp
>    ~] Error: fbnic: MTU too high, or HDS threshold is too low for single
>       buffer XDP.
> 
> - Validate successful XDP attachment when HDS threshold is appropriate
>   ~] ethtool -G eth0 hds-thresh 1536
>   ~] sudo ip link set eth0 xdpdrv obj xdp_pass_12345.o sec xdp
> 
> - Validate when the XDP program is attached, changing HDS thresh to a
>   lower value fails
>   ~] ethtool -G eth0 hds-thresh 512
>   ~] netlink error: fbnic: Use higher HDS threshold or multi-buf capable
>      program
> 
> - Validate HDS thresh does not matter when xdp frags support is
>   available
>   ~] ethtool -G eth0 hds-thresh 512
>   ~] sudo ip link set eth0 xdpdrv obj xdp_pass_mb_12345.o sec xdp.frags
> 
> Signed-off-by: Jakub Kicinski <kuba@...nel.org>
> Signed-off-by: Mohsin Bashir <mohsin.bashr@...il.com>
> ---
>  .../net/ethernet/meta/fbnic/fbnic_ethtool.c   | 11 +++
>  .../net/ethernet/meta/fbnic/fbnic_netdev.c    | 35 +++++++
>  .../net/ethernet/meta/fbnic/fbnic_netdev.h    |  5 +
>  drivers/net/ethernet/meta/fbnic/fbnic_txrx.c  | 95 +++++++++++++++++--
>  drivers/net/ethernet/meta/fbnic/fbnic_txrx.h  |  1 +
>  5 files changed, 140 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
> index 84a0db9f1be0..d7b9eb267ead 100644
> --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
> +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
> @@ -329,6 +329,17 @@ fbnic_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
>  		return -EINVAL;
>  	}
>  
> +	/* If an XDP program is attached, we should check for potential frame
> +	 * splitting. If the new HDS threshold can cause splitting, we should
> +	 * only allow if the attached XDP program can handle frags.
> +	 */
> +	if (fbnic_check_split_frames(fbn->xdp_prog, netdev->mtu,
> +				     kernel_ring->hds_thresh)) {
> +		NL_SET_ERR_MSG_MOD(extack,
> +				   "Use higher HDS threshold or multi-buf capable program");
> +		return -EINVAL;
> +	}
> +
>  	if (!netif_running(netdev)) {
>  		fbnic_set_rings(fbn, ring, kernel_ring);
>  		return 0;
> diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
> index d039e1c7a0d5..0621b89cbf3d 100644
> --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
> +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
> @@ -504,6 +504,40 @@ static void fbnic_get_stats64(struct net_device *dev,
>  	}
>  }
>  
> +bool fbnic_check_split_frames(struct bpf_prog *prog, unsigned int mtu,
> +			      u32 hds_thresh)
> +{
> +	if (!prog)
> +		return false;
> +
> +	if (prog->aux->xdp_has_frags)
> +		return false;
> +
> +	return mtu + ETH_HLEN > hds_thresh;
> +}
> +
> +static int fbnic_bpf(struct net_device *netdev, struct netdev_bpf *bpf)
> +{
> +	struct bpf_prog *prog = bpf->prog, *prev_prog;
> +	struct fbnic_net *fbn = netdev_priv(netdev);
> +
> +	if (bpf->command != XDP_SETUP_PROG)
> +		return -EINVAL;
> +
> +	if (fbnic_check_split_frames(prog, netdev->mtu,
> +				     fbn->hds_thresh)) {
> +		NL_SET_ERR_MSG_MOD(bpf->extack,
> +				   "MTU too high, or HDS threshold is too low for single buffer XDP");
> +		return -EOPNOTSUPP;
> +	}
> +
> +	prev_prog = xchg(&fbn->xdp_prog, prog);
> +	if (prev_prog)
> +		bpf_prog_put(prev_prog);
> +
> +	return 0;
> +}
> +
>  static const struct net_device_ops fbnic_netdev_ops = {
>  	.ndo_open		= fbnic_open,
>  	.ndo_stop		= fbnic_stop,
> @@ -513,6 +547,7 @@ static const struct net_device_ops fbnic_netdev_ops = {
>  	.ndo_set_mac_address	= fbnic_set_mac,
>  	.ndo_set_rx_mode	= fbnic_set_rx_mode,
>  	.ndo_get_stats64	= fbnic_get_stats64,
> +	.ndo_bpf		= fbnic_bpf,
>  	.ndo_hwtstamp_get	= fbnic_hwtstamp_get,
>  	.ndo_hwtstamp_set	= fbnic_hwtstamp_set,
>  };
> diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
> index 04c5c7ed6c3a..bfa79ea910d8 100644
> --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
> +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
> @@ -18,6 +18,8 @@
>  #define FBNIC_TUN_GSO_FEATURES		NETIF_F_GSO_IPXIP6
>  
>  struct fbnic_net {
> +	struct bpf_prog *xdp_prog;
> +
>  	struct fbnic_ring *tx[FBNIC_MAX_TXQS];
>  	struct fbnic_ring *rx[FBNIC_MAX_RXQS];
>  
> @@ -104,4 +106,7 @@ int fbnic_phylink_ethtool_ksettings_get(struct net_device *netdev,
>  int fbnic_phylink_get_fecparam(struct net_device *netdev,
>  			       struct ethtool_fecparam *fecparam);
>  int fbnic_phylink_init(struct net_device *netdev);
> +
> +bool fbnic_check_split_frames(struct bpf_prog *prog,
> +			      unsigned int mtu, u32 hds_threshold);
>  #endif /* _FBNIC_NETDEV_H_ */
> diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
> index 71af7b9d5bcd..486c14e83ad5 100644
> --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
> +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
> @@ -2,17 +2,26 @@
>  /* Copyright (c) Meta Platforms, Inc. and affiliates. */
>  
>  #include <linux/bitfield.h>
> +#include <linux/bpf.h>
> +#include <linux/bpf_trace.h>
>  #include <linux/iopoll.h>
>  #include <linux/pci.h>
>  #include <net/netdev_queues.h>
>  #include <net/page_pool/helpers.h>
>  #include <net/tcp.h>
> +#include <net/xdp.h>
>  
>  #include "fbnic.h"
>  #include "fbnic_csr.h"
>  #include "fbnic_netdev.h"
>  #include "fbnic_txrx.h"
>  
> +enum {
> +	FBNIC_XDP_PASS = 0,
> +	FBNIC_XDP_CONSUME,
> +	FBNIC_XDP_LEN_ERR,
> +};
> +
>  enum {
>  	FBNIC_XMIT_CB_TS	= 0x01,
>  };
> @@ -877,7 +886,7 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd,
>  
>  	headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD;
>  	frame_sz = hdr_pg_end - hdr_pg_start;
> -	xdp_init_buff(&pkt->buff, frame_sz, NULL);
> +	xdp_init_buff(&pkt->buff, frame_sz, &qt->xdp_rxq);
>  	hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) *
>  			FBNIC_BD_FRAG_SIZE;
>  
> @@ -966,6 +975,38 @@ static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv,
>  	return skb;
>  }
>  
> +static struct sk_buff *fbnic_run_xdp(struct fbnic_napi_vector *nv,
> +				     struct fbnic_pkt_buff *pkt)
> +{
> +	struct fbnic_net *fbn = netdev_priv(nv->napi.dev);
> +	struct bpf_prog *xdp_prog;
> +	int act;
> +
> +	xdp_prog = READ_ONCE(fbn->xdp_prog);
> +	if (!xdp_prog)
> +		goto xdp_pass;

Hi Mohsin,

I thought we were past the times when we read prog pointer per each
processed packet and agreed on reading the pointer once per napi loop?

> +
> +	if (xdp_buff_has_frags(&pkt->buff) && !xdp_prog->aux->xdp_has_frags)
> +		return ERR_PTR(-FBNIC_XDP_LEN_ERR);

when can this happen and couldn't you catch this within ndo_bpf? i suppose
it's related to hds setup.

> +
> +	act = bpf_prog_run_xdp(xdp_prog, &pkt->buff);
> +	switch (act) {
> +	case XDP_PASS:
> +xdp_pass:
> +		return fbnic_build_skb(nv, pkt);
> +	default:
> +		bpf_warn_invalid_xdp_action(nv->napi.dev, xdp_prog, act);
> +		fallthrough;
> +	case XDP_ABORTED:
> +		trace_xdp_exception(nv->napi.dev, xdp_prog, act);
> +		fallthrough;
> +	case XDP_DROP:
> +		break;
> +	}
> +
> +	return ERR_PTR(-FBNIC_XDP_CONSUME);
> +}
> +
>  static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd)
>  {
>  	return (FBNIC_RCD_META_L4_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L4 :
> @@ -1064,7 +1105,7 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
>  			if (unlikely(pkt->add_frag_failed))
>  				skb = NULL;
>  			else if (likely(!fbnic_rcd_metadata_err(rcd)))
> -				skb = fbnic_build_skb(nv, pkt);
> +				skb = fbnic_run_xdp(nv, pkt);
>  
>  			/* Populate skb and invalidate XDP */
>  			if (!IS_ERR_OR_NULL(skb)) {
> @@ -1250,6 +1291,7 @@ static void fbnic_free_napi_vector(struct fbnic_net *fbn,
>  	}
>  
>  	for (j = 0; j < nv->rxt_count; j++, i++) {
> +		xdp_rxq_info_unreg(&nv->qt[i].xdp_rxq);
>  		fbnic_remove_rx_ring(fbn, &nv->qt[i].sub0);
>  		fbnic_remove_rx_ring(fbn, &nv->qt[i].sub1);
>  		fbnic_remove_rx_ring(fbn, &nv->qt[i].cmpl);
> @@ -1422,6 +1464,11 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
>  		fbnic_ring_init(&qt->cmpl, db, rxq_idx, FBNIC_RING_F_STATS);
>  		fbn->rx[rxq_idx] = &qt->cmpl;
>  
> +		err = xdp_rxq_info_reg(&qt->xdp_rxq, fbn->netdev, rxq_idx,
> +				       nv->napi.napi_id);
> +		if (err)
> +			goto free_ring_cur_qt;
> +
>  		/* Update Rx queue index */
>  		rxt_count--;
>  		rxq_idx += v_count;
> @@ -1432,6 +1479,25 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
>  
>  	return 0;
>  
> +	while (rxt_count < nv->rxt_count) {
> +		qt--;
> +
> +		xdp_rxq_info_unreg(&qt->xdp_rxq);
> +free_ring_cur_qt:
> +		fbnic_remove_rx_ring(fbn, &qt->sub0);
> +		fbnic_remove_rx_ring(fbn, &qt->sub1);
> +		fbnic_remove_rx_ring(fbn, &qt->cmpl);
> +		rxt_count++;
> +	}
> +	while (txt_count < nv->txt_count) {
> +		qt--;
> +
> +		fbnic_remove_tx_ring(fbn, &qt->sub0);
> +		fbnic_remove_tx_ring(fbn, &qt->cmpl);
> +
> +		txt_count++;
> +	}
> +	fbnic_napi_free_irq(fbd, nv);
>  pp_destroy:
>  	page_pool_destroy(nv->page_pool);
>  napi_del:
> @@ -1708,8 +1774,10 @@ static void fbnic_free_nv_resources(struct fbnic_net *fbn,
>  	for (i = 0; i < nv->txt_count; i++)
>  		fbnic_free_qt_resources(fbn, &nv->qt[i]);
>  
> -	for (j = 0; j < nv->rxt_count; j++, i++)
> +	for (j = 0; j < nv->rxt_count; j++, i++) {
>  		fbnic_free_qt_resources(fbn, &nv->qt[i]);
> +		xdp_rxq_info_unreg_mem_model(&nv->qt[i].xdp_rxq);
> +	}
>  }
>  
>  static int fbnic_alloc_nv_resources(struct fbnic_net *fbn,
> @@ -1721,19 +1789,32 @@ static int fbnic_alloc_nv_resources(struct fbnic_net *fbn,
>  	for (i = 0; i < nv->txt_count; i++) {
>  		err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]);
>  		if (err)
> -			goto free_resources;
> +			goto free_qt_resources;
>  	}
>  
>  	/* Allocate Rx Resources */
>  	for (j = 0; j < nv->rxt_count; j++, i++) {
> +		/* Register XDP memory model for completion queue */
> +		err = xdp_reg_mem_model(&nv->qt[i].xdp_rxq.mem,
> +					MEM_TYPE_PAGE_POOL,
> +					nv->page_pool);
> +		if (err)
> +			goto xdp_unreg_mem_model;
> +
>  		err = fbnic_alloc_rx_qt_resources(fbn, &nv->qt[i]);
>  		if (err)
> -			goto free_resources;
> +			goto xdp_unreg_cur_model;
>  	}
>  
>  	return 0;
>  
> -free_resources:
> +xdp_unreg_mem_model:
> +	while (j-- && i--) {
> +		fbnic_free_qt_resources(fbn, &nv->qt[i]);
> +xdp_unreg_cur_model:
> +		xdp_rxq_info_unreg_mem_model(&nv->qt[i].xdp_rxq);
> +	}
> +free_qt_resources:
>  	while (i--)
>  		fbnic_free_qt_resources(fbn, &nv->qt[i]);
>  	return err;
> @@ -2025,7 +2106,7 @@ void fbnic_flush(struct fbnic_net *fbn)
>  			memset(qt->cmpl.desc, 0, qt->cmpl.size);
>  
>  			fbnic_put_pkt_buff(nv, qt->cmpl.pkt, 0);
> -			qt->cmpl.pkt->buff.data_hard_start = NULL;
> +			memset(qt->cmpl.pkt, 0, sizeof(struct fbnic_pkt_buff));
>  		}
>  	}
>  }
> diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
> index be34962c465e..0fefd1f00196 100644
> --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
> +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
> @@ -129,6 +129,7 @@ struct fbnic_ring {
>  
>  struct fbnic_q_triad {
>  	struct fbnic_ring sub0, sub1, cmpl;
> +	struct xdp_rxq_info xdp_rxq;
>  };
>  
>  struct fbnic_napi_vector {
> -- 
> 2.47.1
> 
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ