lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <VI1PR04MB5807F56500D25ECD20657618F2FF0@VI1PR04MB5807.eurprd04.prod.outlook.com>
Date:   Fri, 20 Nov 2020 18:54:42 +0000
From:   Camelia Alexandra Groza <camelia.groza@....com>
To:     Maciej Fijalkowski <maciej.fijalkowski@...el.com>
CC:     "kuba@...nel.org" <kuba@...nel.org>,
        "brouer@...hat.com" <brouer@...hat.com>,
        "saeed@...nel.org" <saeed@...nel.org>,
        "davem@...emloft.net" <davem@...emloft.net>,
        "Madalin Bucur (OSS)" <madalin.bucur@....nxp.com>,
        Ioana Ciornei <ioana.ciornei@....com>,
        "netdev@...r.kernel.org" <netdev@...r.kernel.org>
Subject: RE: [PATCH net-next v3 4/7] dpaa_eth: add XDP_TX support

> -----Original Message-----
> From: Maciej Fijalkowski <maciej.fijalkowski@...el.com>
> Sent: Friday, November 20, 2020 01:51
> To: Camelia Alexandra Groza <camelia.groza@....com>
> Cc: kuba@...nel.org; brouer@...hat.com; saeed@...nel.org;
> davem@...emloft.net; Madalin Bucur (OSS)
> <madalin.bucur@....nxp.com>; Ioana Ciornei <ioana.ciornei@....com>;
> netdev@...r.kernel.org
> Subject: Re: [PATCH net-next v3 4/7] dpaa_eth: add XDP_TX support
> 
> On Thu, Nov 19, 2020 at 06:29:33PM +0200, Camelia Groza wrote:
> > Use an xdp_frame structure for managing the frame. Store a backpointer
> to
> > the structure at the start of the buffer before enqueueing. Use the XDP
> > API for freeing the buffer when it returns to the driver on the TX
> > confirmation path.
> 
> Completion path?

The DPAA terminology uses confirmation instead of completion, but the meaning is similar. The dpaa.rst documentation has more details.

> >
> > This approach will be reused for XDP REDIRECT.
> >
> > Acked-by: Madalin Bucur <madalin.bucur@....nxp.com>
> > Signed-off-by: Camelia Groza <camelia.groza@....com>
> > ---
> >  drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 129
> ++++++++++++++++++++++++-
> >  drivers/net/ethernet/freescale/dpaa/dpaa_eth.h |   2 +
> >  2 files changed, 126 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
> b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
> > index 242ed45..cd5f4f6 100644
> > --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
> > +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
> > @@ -1130,6 +1130,24 @@ static int dpaa_fq_init(struct dpaa_fq *dpaa_fq,
> bool td_enable)
> >
> >  	dpaa_fq->fqid = qman_fq_fqid(fq);
> >
> > +	if (dpaa_fq->fq_type == FQ_TYPE_RX_DEFAULT ||
> > +	    dpaa_fq->fq_type == FQ_TYPE_RX_PCD) {
> > +		err = xdp_rxq_info_reg(&dpaa_fq->xdp_rxq, dpaa_fq-
> >net_dev,
> > +				       dpaa_fq->fqid);
> > +		if (err) {
> > +			dev_err(dev, "xdp_rxq_info_reg failed\n");
> 
> Print out the err?

I'll add the err in the next version.

> Also, shouldn't you call qman_destroy_fq() for these error paths?

Yes, for the point of undoing all configurations on the queue. The entire probe is aborted if things fail at this point, so everything is cleaned either way. I'll fix this in the next version.

> > +			return err;
> > +		}
> > +
> > +		err = xdp_rxq_info_reg_mem_model(&dpaa_fq->xdp_rxq,
> > +						 MEM_TYPE_PAGE_ORDER0,
> NULL);
> > +		if (err) {
> > +			dev_err(dev, "xdp_rxq_info_reg_mem_model
> failed\n");
> > +			xdp_rxq_info_unreg(&dpaa_fq->xdp_rxq);
> > +			return err;
> > +		}
> > +	}
> > +
> >  	return 0;
> >  }
> >
> > @@ -1159,6 +1177,10 @@ static int dpaa_fq_free_entry(struct device
> *dev, struct qman_fq *fq)
> >  		}
> >  	}
> >
> > +	if (dpaa_fq->fq_type == FQ_TYPE_RX_DEFAULT ||
> > +	    dpaa_fq->fq_type == FQ_TYPE_RX_PCD)
> 
> You should call xdp_rxq_info_is_reg() before the unregister below.

I'll add it.

> > +		xdp_rxq_info_unreg(&dpaa_fq->xdp_rxq);
> > +
> >  	qman_destroy_fq(fq);
> >  	list_del(&dpaa_fq->list);
> >
> > @@ -1625,6 +1647,9 @@ static int dpaa_eth_refill_bpools(struct dpaa_priv
> *priv)
> >   *
> >   * Return the skb backpointer, since for S/G frames the buffer containing it
> >   * gets freed here.
> > + *
> > + * No skb backpointer is set when transmitting XDP frames. Cleanup the
> buffer
> > + * and return NULL in this case.
> >   */
> >  static struct sk_buff *dpaa_cleanup_tx_fd(const struct dpaa_priv *priv,
> >  					  const struct qm_fd *fd, bool ts)
> > @@ -1636,6 +1661,7 @@ static struct sk_buff *dpaa_cleanup_tx_fd(const
> struct dpaa_priv *priv,
> >  	void *vaddr = phys_to_virt(addr);
> >  	const struct qm_sg_entry *sgt;
> >  	struct dpaa_eth_swbp *swbp;
> > +	struct xdp_frame *xdpf;
> 
> This local variable feels a bit unnecessary.

Sure, I'll remove it.

> >  	struct sk_buff *skb;
> >  	u64 ns;
> >  	int i;
> > @@ -1664,13 +1690,22 @@ static struct sk_buff
> *dpaa_cleanup_tx_fd(const struct dpaa_priv *priv,
> >  		}
> >  	} else {
> >  		dma_unmap_single(priv->tx_dma_dev, addr,
> > -				 priv->tx_headroom +
> qm_fd_get_length(fd),
> > +				 qm_fd_get_offset(fd) +
> qm_fd_get_length(fd),
> >  				 dma_dir);
> >  	}
> >
> >  	swbp = (struct dpaa_eth_swbp *)vaddr;
> >  	skb = swbp->skb;
> >
> > +	/* No skb backpointer is set when running XDP. An xdp_frame
> > +	 * backpointer is saved instead.
> > +	 */
> > +	if (!skb) {
> > +		xdpf = swbp->xdpf;
> > +		xdp_return_frame(xdpf);
> > +		return NULL;
> > +	}
> > +
> >  	/* DMA unmapping is required before accessing the HW provided
> info */
> >  	if (ts && priv->tx_tstamp &&
> >  	    skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
> > @@ -2350,11 +2385,76 @@ static enum qman_cb_dqrr_result
> rx_error_dqrr(struct qman_portal *portal,
> >  	return qman_cb_dqrr_consume;
> >  }
> >
> > +static int dpaa_xdp_xmit_frame(struct net_device *net_dev,
> > +			       struct xdp_frame *xdpf)
> > +{
> > +	struct dpaa_priv *priv = netdev_priv(net_dev);
> > +	struct rtnl_link_stats64 *percpu_stats;
> > +	struct dpaa_percpu_priv *percpu_priv;
> > +	struct dpaa_eth_swbp *swbp;
> > +	struct netdev_queue *txq;
> > +	void *buff_start;
> > +	struct qm_fd fd;
> > +	dma_addr_t addr;
> > +	int err;
> > +
> > +	percpu_priv = this_cpu_ptr(priv->percpu_priv);
> > +	percpu_stats = &percpu_priv->stats;
> > +
> > +	if (xdpf->headroom < DPAA_TX_PRIV_DATA_SIZE) {
> 
> Could you shed some light on DPAA_TX_PRIV_DATA_SIZE usage?

We store information at the start of egress buffers that we use on the confirmation path for cleanup. DPAA_TX_PRIV_DATA_SIZE is the size of this reserved area. We require buffers to have at least this mush space available at the start before transmitting them.

> > +		err = -EINVAL;
> > +		goto out_error;
> > +	}
> > +
> > +	buff_start = xdpf->data - xdpf->headroom;
> > +
> > +	/* Leave empty the skb backpointer at the start of the buffer.
> > +	 * Save the XDP frame for easy cleanup on confirmation.
> > +	 */
> > +	swbp = (struct dpaa_eth_swbp *)buff_start;
> > +	swbp->skb = NULL;
> > +	swbp->xdpf = xdpf;
> > +
> > +	qm_fd_clear_fd(&fd);
> > +	fd.bpid = FSL_DPAA_BPID_INV;
> > +	fd.cmd |= cpu_to_be32(FM_FD_CMD_FCO);
> > +	qm_fd_set_contig(&fd, xdpf->headroom, xdpf->len);
> > +
> > +	addr = dma_map_single(priv->tx_dma_dev, buff_start,
> > +			      xdpf->headroom + xdpf->len,
> > +			      DMA_TO_DEVICE);
> > +	if (unlikely(dma_mapping_error(priv->tx_dma_dev, addr))) {
> > +		err = -EINVAL;
> > +		goto out_error;
> > +	}
> > +
> > +	qm_fd_addr_set64(&fd, addr);
> > +
> > +	/* Bump the trans_start */
> > +	txq = netdev_get_tx_queue(net_dev, smp_processor_id());
> > +	txq->trans_start = jiffies;
> > +
> > +	err = dpaa_xmit(priv, percpu_stats, smp_processor_id(), &fd);
> 
> So it looks like you don't provide the XDP Tx resources and you share the
> netstack's Tx queues with XDP, if I'm reading this right.
> 
> Please mention it/explain in the cover letter or commit message of this
> patch. Furthermore, I don't see any locking happenning over here?

Yes, that's correct, we don't have dedicated Tx queues for XDP, we share them with the netstack.

We don't need locking at this level. We are a LLTX driver so we don't take the netif_tx_lock. I'll mention this in the cover letter.

> > +	if (err) {
> > +		dma_unmap_single(priv->tx_dma_dev, addr,
> > +				 qm_fd_get_offset(&fd) +
> qm_fd_get_length(&fd),
> > +				 DMA_TO_DEVICE);
> > +		goto out_error;
> > +	}
> > +
> > +	return 0;
> > +
> > +out_error:
> > +	percpu_stats->tx_errors++;
> > +	return err;
> > +}
> > +
> >  static u32 dpaa_run_xdp(struct dpaa_priv *priv, struct qm_fd *fd, void
> *vaddr,
> > -			unsigned int *xdp_meta_len)
> > +			struct dpaa_fq *dpaa_fq, unsigned int
> *xdp_meta_len)
> >  {
> >  	ssize_t fd_off = qm_fd_get_offset(fd);
> >  	struct bpf_prog *xdp_prog;
> > +	struct xdp_frame *xdpf;
> >  	struct xdp_buff xdp;
> >  	u32 xdp_act;
> >
> > @@ -2370,7 +2470,8 @@ static u32 dpaa_run_xdp(struct dpaa_priv *priv,
> struct qm_fd *fd, void *vaddr,
> >  	xdp.data_meta = xdp.data;
> >  	xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
> >  	xdp.data_end = xdp.data + qm_fd_get_length(fd);
> > -	xdp.frame_sz = DPAA_BP_RAW_SIZE;
> > +	xdp.frame_sz = DPAA_BP_RAW_SIZE - DPAA_TX_PRIV_DATA_SIZE;
> > +	xdp.rxq = &dpaa_fq->xdp_rxq;
> >
> >  	xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
> >
> > @@ -2381,6 +2482,22 @@ static u32 dpaa_run_xdp(struct dpaa_priv *priv,
> struct qm_fd *fd, void *vaddr,
> >  	case XDP_PASS:
> >  		*xdp_meta_len = xdp.data - xdp.data_meta;
> >  		break;
> > +	case XDP_TX:
> > +		/* We can access the full headroom when sending the frame
> > +		 * back out
> 
> And normally why a piece of headroom is taken away? I probably should
> have
> started from the basic XDP support patch, but if you don't mind, please
> explain it a bit.

I mentioned we require DPAA_TX_PRIV_DATA_SIZE bytes at the start of the buffer in order to make sure we have enough space for our private info.

When setting up the xdp_buff, this area is reserved from the frame size exposed to the XDP program.
 -	xdp.frame_sz = DPAA_BP_RAW_SIZE;
 +	xdp.frame_sz = DPAA_BP_RAW_SIZE - DPAA_TX_PRIV_DATA_SIZE;

After the XDP_TX verdict, we're sure that DPAA_TX_PRIV_DATA_SIZE bytes at the start of the buffer are free and we can use the full headroom how it suits us, hence the increase of the frame size back to DPAA_BP_RAW_SIZE.

Thanks for all your feedback.

> > +		 */
> > +		xdp.data_hard_start = vaddr;
> > +		xdp.frame_sz = DPAA_BP_RAW_SIZE;
> > +		xdpf = xdp_convert_buff_to_frame(&xdp);
> > +		if (unlikely(!xdpf)) {
> > +			free_pages((unsigned long)vaddr, 0);
> > +			break;
> > +		}
> > +
> > +		if (dpaa_xdp_xmit_frame(priv->net_dev, xdpf))
> > +			xdp_return_frame_rx_napi(xdpf);
> > +
> > +		break;
> >  	default:
> >  		bpf_warn_invalid_xdp_action(xdp_act);
> >  		fallthrough;
> > @@ -2415,6 +2532,7 @@ static enum qman_cb_dqrr_result
> rx_default_dqrr(struct qman_portal *portal,
> >  	u32 fd_status, hash_offset;
> >  	struct qm_sg_entry *sgt;
> >  	struct dpaa_bp *dpaa_bp;
> > +	struct dpaa_fq *dpaa_fq;
> >  	struct dpaa_priv *priv;
> >  	struct sk_buff *skb;
> >  	int *count_ptr;
> > @@ -2423,9 +2541,10 @@ static enum qman_cb_dqrr_result
> rx_default_dqrr(struct qman_portal *portal,
> >  	u32 hash;
> >  	u64 ns;
> >
> > +	dpaa_fq = container_of(fq, struct dpaa_fq, fq_base);
> >  	fd_status = be32_to_cpu(fd->status);
> >  	fd_format = qm_fd_get_format(fd);
> > -	net_dev = ((struct dpaa_fq *)fq)->net_dev;
> > +	net_dev = dpaa_fq->net_dev;
> >  	priv = netdev_priv(net_dev);
> >  	dpaa_bp = dpaa_bpid2pool(dq->fd.bpid);
> >  	if (!dpaa_bp)
> > @@ -2494,7 +2613,7 @@ static enum qman_cb_dqrr_result
> rx_default_dqrr(struct qman_portal *portal,
> >
> >  	if (likely(fd_format == qm_fd_contig)) {
> >  		xdp_act = dpaa_run_xdp(priv, (struct qm_fd *)fd, vaddr,
> > -				       &xdp_meta_len);
> > +				       dpaa_fq, &xdp_meta_len);
> >  		if (xdp_act != XDP_PASS) {
> >  			percpu_stats->rx_packets++;
> >  			percpu_stats->rx_bytes += qm_fd_get_length(fd);
> > diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
> b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
> > index 94e8613..5c8d52a 100644
> > --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
> > +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
> > @@ -68,6 +68,7 @@ struct dpaa_fq {
> >  	u16 channel;
> >  	u8 wq;
> >  	enum dpaa_fq_type fq_type;
> > +	struct xdp_rxq_info xdp_rxq;
> >  };
> >
> >  struct dpaa_fq_cbs {
> > @@ -150,6 +151,7 @@ struct dpaa_buffer_layout {
> >   */
> >  struct dpaa_eth_swbp {
> >  	struct sk_buff *skb;
> > +	struct xdp_frame *xdpf;
> >  };
> >
> >  struct dpaa_priv {
> > --
> > 1.9.1
> >

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ