[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20191030112754.05f04f6d@cakuba.netronome.com>
Date: Wed, 30 Oct 2019 11:27:54 -0700
From: Jakub Kicinski <jakub.kicinski@...ronome.com>
To: Jeff Kirsher <jeffrey.t.kirsher@...el.com>
Cc: davem@...emloft.net,
Maciej Fijalkowski <maciej.fijalkowski@...el.com>,
netdev@...r.kernel.org, nhorman@...hat.com, sassmann@...hat.com,
Tony Nguyen <anthony.l.nguyen@...el.com>,
Andrew Bowers <andrewx.bowers@...el.com>
Subject: Re: [net-next 3/9] ice: Add support for XDP
On Tue, 29 Oct 2019 20:29:04 -0700, Jeff Kirsher wrote:
> From: Maciej Fijalkowski <maciej.fijalkowski@...el.com>
>
> Add support for XDP. Implement ndo_bpf and ndo_xdp_xmit. Upon load of
> an XDP program, allocate additional Tx rings for dedicated XDP use.
> The following actions are supported: XDP_TX, XDP_DROP, XDP_REDIRECT,
> XDP_PASS, and XDP_ABORTED.
>
> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@...el.com>
> Signed-off-by: Tony Nguyen <anthony.l.nguyen@...el.com>
> Tested-by: Andrew Bowers <andrewx.bowers@...el.com>
> +/**
> + * ice_xdp_setup_prog - Add or remove XDP eBPF program
> + * @vsi: VSI to setup XDP for
> + * @prog: XDP program
> + * @extack: netlink extended ack
> + */
> +static int
> +ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
> + struct netlink_ext_ack *extack)
> +{
> + int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
> + bool if_running = netif_running(vsi->netdev);
> + struct bpf_prog *old_prog;
> + int i, ret = 0;
> +
> + if (frame_size > vsi->rx_buf_len) {
> + NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
> + return -ENOTSUPP;
s/ENOTSUPP/EOPNOTSUPP/ everywhere
> + }
> +
> + if (!ice_is_xdp_ena_vsi(vsi) && !prog)
> + return 0;
This is handled by the core these days.
> + /* need to stop netdev while setting up the program for Rx rings */
> + if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) {
> + ret = ice_down(vsi);
> + if (ret) {
> + NL_SET_ERR_MSG_MOD(extack,
> + "Preparing device for XDP attach failed");
> + goto skip_setting_prog;
> + }
> + }
> +
> + if (!ice_is_xdp_ena_vsi(vsi) && prog) {
> + vsi->num_xdp_txq = vsi->alloc_txq;
> + vsi->xdp_mapping_mode = ICE_VSI_MAP_CONTIG;
> + if (ice_prepare_xdp_rings(vsi)) {
> + NL_SET_ERR_MSG_MOD(extack,
> + "Setting up XDP Tx resources failed");
> + ret = -ENOMEM;
> + goto skip_setting_prog;
> + }
> + } else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
> + if (ice_destroy_xdp_rings(vsi)) {
> + NL_SET_ERR_MSG_MOD(extack,
> + "Freeing XDP Tx resources failed");
> + ret = -ENOMEM;
> + goto skip_setting_prog;
The failures to destroy are kind of concerning, are you 100% sure
XDP is still operational if destroy fails?
> + }
> + }
> +
> + old_prog = xchg(&vsi->xdp_prog, prog);
> + if (old_prog)
> + bpf_prog_put(old_prog);
> +
> + ice_for_each_rxq(vsi, i)
> + WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
> +
> + if (if_running)
> + ret = ice_up(vsi);
> +
> +skip_setting_prog:
> + return ret;
If this is not expanded by later patches and you don't have any other
code to run, just return directly without the gotos please.
Actually if ice_down(vsi) was run before shouldn't the error case do
ice_up(vsi)?
> +}
> +
> +/**
> + * ice_xdp - implements XDP handler
> + * @dev: netdevice
> + * @xdp: XDP command
> + */
> +static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
> +{
> + struct ice_netdev_priv *np = netdev_priv(dev);
> + struct ice_vsi *vsi = np->vsi;
> +
> + if (vsi->type != ICE_VSI_PF) {
> + NL_SET_ERR_MSG_MOD(xdp->extack,
> + "XDP can be loaded only on PF VSI");
> + return -EINVAL;
> + }
> +
> + switch (xdp->command) {
> + case XDP_SETUP_PROG:
> + return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
> + case XDP_QUERY_PROG:
> + xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
> + return 0;
> + default:
> + NL_SET_ERR_MSG_MOD(xdp->extack, "Unknown XDP command");
Please drop the extack for unknown command, I'm concerned it will leak
out somewhere where it's perfectly fine to ignore unknown commands.
> + return -EINVAL;
> + }
> +}
> +/**
> + * ice_run_xdp - Executes an XDP program on initialized xdp_buff
> + * @rx_ring: Rx ring
> + * @xdp: xdp_buff used as input to the XDP program
> + * @xdp_prog: XDP program to run
> + *
> + * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
> + */
> +static int
> +ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
> + struct bpf_prog *xdp_prog)
> +{
> + int err, result = ICE_XDP_PASS;
> + struct ice_ring *xdp_ring;
> + u32 act;
> +
> + act = bpf_prog_run_xdp(xdp_prog, xdp);
> + switch (act) {
> + case XDP_PASS:
> + break;
> + case XDP_TX:
> + xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index];
Here you index the XDP ring with rx queue id but below ice_xdp_xmit()
uses smp_processor_id(). That worries me a little. If TX and REDIRECT
happens at the same time and rx->q_index != smp_processor_id() it could
cause trouble, no?
> + result = ice_xmit_xdp_buff(xdp, xdp_ring);
> + break;
> + case XDP_REDIRECT:
> + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
> + result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
> + break;
> + default:
> + bpf_warn_invalid_xdp_action(act);
> + /* fallthrough -- not supported action */
> + case XDP_ABORTED:
> + trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
> + /* fallthrough -- handle aborts by dropping frame */
> + case XDP_DROP:
> + result = ICE_XDP_CONSUMED;
> + break;
> + }
> +
> + return result;
> +}
> +
> +/**
> + * ice_xdp_xmit - submit packets to XDP ring for transmission
> + * @dev: netdev
> + * @n: number of XDP frames to be transmitted
> + * @frames: XDP frames to be transmitted
> + * @flags: transmit flags
> + *
> + * Returns number of frames successfully sent. Frames that fail are
> + * free'ed via XDP return API.
> + * For error cases, a negative errno code is returned and no-frames
> + * are transmitted (caller must handle freeing frames).
> + */
> +int
> +ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
> + u32 flags)
> +{
> + struct ice_netdev_priv *np = netdev_priv(dev);
> + unsigned int queue_index = smp_processor_id();
> + struct ice_vsi *vsi = np->vsi;
> + struct ice_ring *xdp_ring;
> + int drops = 0, i;
> +
> + if (test_bit(__ICE_DOWN, vsi->state))
> + return -ENETDOWN;
> +
> + if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq)
> + return -ENXIO;
> +
> + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
> + return -EINVAL;
> +
> + xdp_ring = vsi->xdp_rings[queue_index];
> + for (i = 0; i < n; i++) {
> + struct xdp_frame *xdpf = frames[i];
> + int err;
> +
> + err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
> + if (err != ICE_XDP_TX) {
> + xdp_return_frame_rx_napi(xdpf);
> + drops++;
> + }
> + }
> +
> + if (unlikely(flags & XDP_XMIT_FLUSH))
> + ice_xdp_ring_update_tail(xdp_ring);
> +
> + return n - drops;
> +}
> diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
> index a914e603b2ed..f74ec83faa55 100644
> --- a/drivers/net/ethernet/intel/ice/ice_txrx.h
> +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
> @@ -22,6 +22,16 @@
> #define ICE_RX_BUF_WRITE 16 /* Must be power of 2 */
> #define ICE_MAX_TXQ_PER_TXQG 128
>
> +static inline __le64
> +ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
> +{
> + return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
> + (td_cmd << ICE_TXD_QW1_CMD_S) |
> + (td_offset << ICE_TXD_QW1_OFFSET_S) |
> + ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
> + (td_tag << ICE_TXD_QW1_L2TAG1_S));
> +}
Please move the build_ctob() rename to a separate patch. All changes
which are not directly related to the objective of the patch make it
harder to review.
Powered by blists - more mailing lists