lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190627161816.0000645a@gmail.com>
Date:   Thu, 27 Jun 2019 16:18:16 +0200
From:   Maciej Fijalkowski <maciejromanfijalkowski@...il.com>
To:     Ilias Apalodimas <ilias.apalodimas@...aro.org>
Cc:     netdev@...r.kernel.org, jaswinder.singh@...aro.org,
        ard.biesheuvel@...aro.org, bjorn.topel@...el.com,
        magnus.karlsson@...el.com, brouer@...hat.com, daniel@...earbox.net,
        ast@...nel.org, makita.toshiaki@....ntt.co.jp,
        jakub.kicinski@...ronome.com, john.fastabend@...il.com,
        davem@...emloft.net
Subject: Re: [RFC, PATCH 2/2, net-next] net: netsec: add XDP support

On Tue, 25 Jun 2019 18:06:19 +0300
Ilias Apalodimas <ilias.apalodimas@...aro.org> wrote:

Hi Ilias,

> +/* The current driver only supports 1 Txq, this should run under spin_lock() */
> +static u32 netsec_xdp_queue_one(struct netsec_priv *priv,
> +				struct xdp_frame *xdpf, bool is_ndo)
> +
> +{
> +	struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
> +	struct page *page = virt_to_page(xdpf->data);
> +	struct netsec_tx_pkt_ctrl tx_ctrl = {};
> +	struct netsec_desc tx_desc;
> +	dma_addr_t dma_handle;
> +	u16 filled;
> +
> +	if (tx_ring->head >= tx_ring->tail)
> +		filled = tx_ring->head - tx_ring->tail;
> +	else
> +		filled = tx_ring->head + DESC_NUM - tx_ring->tail;
> +
> +	if (DESC_NUM - filled <= 1)
> +		return NETSEC_XDP_CONSUMED;
> +
> +	if (is_ndo) {
> +		/* this is for ndo_xdp_xmit, the buffer needs mapping before
> +		 * sending
> +		 */
> +		dma_handle = dma_map_single(priv->dev, xdpf->data, xdpf->len,
> +					    DMA_TO_DEVICE);
> +		if (dma_mapping_error(priv->dev, dma_handle))
> +			return NETSEC_XDP_CONSUMED;
> +		tx_desc.buf_type = TYPE_NETSEC_XDP_NDO;
> +	} else {
> +		/* This is the device Rx buffer from page_pool. No need to remap
> +		 * just sync and send it
> +		 */
> +		dma_handle = page_pool_get_dma_addr(page) +
> +			NETSEC_RXBUF_HEADROOM;
> +		dma_sync_single_for_device(priv->dev, dma_handle, xdpf->len,
> +					   DMA_BIDIRECTIONAL);
> +		tx_desc.buf_type = TYPE_NETSEC_XDP_TX;
> +	}
> +	tx_ctrl.cksum_offload_flag = false;
> +	tx_ctrl.tcp_seg_offload_flag = false;
> +	tx_ctrl.tcp_seg_len = 0;

Aren't these three lines redundant? tx_ctrl is zero initialized.

> +
> +	tx_desc.dma_addr = dma_handle;
> +	tx_desc.addr = xdpf->data;
> +	tx_desc.len = xdpf->len;
> +
> +	netsec_set_tx_de(priv, tx_ring, &tx_ctrl, &tx_desc, xdpf);
> +
> +	return NETSEC_XDP_TX;
> +}
> +
> +static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
> +{
> +	struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
> +	struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
> +	u32 ret;
> +
> +	if (unlikely(!xdpf))
> +		return NETSEC_XDP_CONSUMED;
> +
> +	spin_lock(&tx_ring->lock);
> +	ret = netsec_xdp_queue_one(priv, xdpf, false);
> +	spin_unlock(&tx_ring->lock);
> +
> +	return ret;
> +}
> +
> +static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> +			  struct xdp_buff *xdp)
> +{
> +	u32 ret = NETSEC_XDP_PASS;
> +	int err;
> +	u32 act;
> +
> +	rcu_read_lock();
> +	act = bpf_prog_run_xdp(prog, xdp);
> +
> +	switch (act) {
> +	case XDP_PASS:
> +		ret = NETSEC_XDP_PASS;
> +		break;
> +	case XDP_TX:
> +		ret = netsec_xdp_xmit_back(priv, xdp);
> +		if (ret != NETSEC_XDP_TX)
> +			xdp_return_buff(xdp);
> +		break;
> +	case XDP_REDIRECT:
> +		err = xdp_do_redirect(priv->ndev, xdp, prog);
> +		if (!err) {
> +			ret = NETSEC_XDP_REDIR;
> +		} else {
> +			ret = NETSEC_XDP_CONSUMED;
> +			xdp_return_buff(xdp);
> +		}
> +		break;
> +	default:
> +		bpf_warn_invalid_xdp_action(act);
> +		/* fall through */
> +	case XDP_ABORTED:
> +		trace_xdp_exception(priv->ndev, prog, act);
> +		/* fall through -- handle aborts by dropping packet */
> +	case XDP_DROP:
> +		ret = NETSEC_XDP_CONSUMED;
> +		xdp_return_buff(xdp);
> +		break;
> +	}
> +
> +	rcu_read_unlock();
> +
> +	return ret;
> +}
> +
>  static int netsec_process_rx(struct netsec_priv *priv, int budget)
>  {
>  	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> +	struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog);

Reading BPF prog should be RCU protected. There might be a case where RCU
callback that destroys BPF prog is executed during the bottom half handling and
you have the PREEMPT_RCU=y in your kernel config. I've just rephrased Brenden's
words here, so for further info, see:

https://lore.kernel.org/netdev/20160904042958.8594-1-bblanco@plumgrid.com/

So either expand the RCU section or read prog pointer per each frame, under the
lock, as it seems that currently we have these two schemes in drivers that
support XDP.

>  	struct net_device *ndev = priv->ndev;
>  	struct netsec_rx_pkt_info rx_info;
> -	struct sk_buff *skb;
> +	struct sk_buff *skb = NULL;
> +	u16 xdp_xmit = 0;
> +	u32 xdp_act = 0;
>  	int done = 0;
>  
>  	while (done < budget) {
> @@ -727,8 +903,10 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
>  		struct netsec_de *de = dring->vaddr + (DESC_SZ * idx);
>  		struct netsec_desc *desc = &dring->desc[idx];
>  		struct page *page = virt_to_page(desc->addr);
> +		u32 xdp_result = XDP_PASS;
>  		u16 pkt_len, desc_len;
>  		dma_addr_t dma_handle;
> +		struct xdp_buff xdp;
>  		void *buf_addr;
>  
>  		if (de->attr & (1U << NETSEC_RX_PKT_OWN_FIELD)) {
> @@ -773,7 +951,23 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
>  					DMA_FROM_DEVICE);
>  		prefetch(desc->addr);
>  
> +		xdp.data_hard_start = desc->addr;
> +		xdp.data = desc->addr + NETSEC_RXBUF_HEADROOM;
> +		xdp_set_data_meta_invalid(&xdp);
> +		xdp.data_end = xdp.data + pkt_len;
> +		xdp.rxq = &dring->xdp_rxq;
> +
> +		if (xdp_prog) {
> +			xdp_result = netsec_run_xdp(priv, xdp_prog, &xdp);
> +			if (xdp_result != NETSEC_XDP_PASS) {
> +				xdp_act |= xdp_result;
> +				if (xdp_result == NETSEC_XDP_TX)
> +					xdp_xmit++;
> +				goto next;
> +			}
> +		}
>  		skb = build_skb(desc->addr, desc->len + NETSEC_RX_BUF_NON_DATA);
> +
>  		if (unlikely(!skb)) {
>  			/* If skb fails recycle_direct will either unmap and
>  			 * free the page or refill the cache depending on the
> @@ -787,27 +981,30 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
>  		}
>  		page_pool_release_page(dring->page_pool, page);
>  
> -		/* Update the descriptor with the new buffer we allocated */
> -		desc->len = desc_len;
> -		desc->dma_addr = dma_handle;
> -		desc->addr = buf_addr;
> -
> -		skb_reserve(skb, NETSEC_SKB_PAD);
> -		skb_put(skb, pkt_len);
> +		skb_reserve(skb, xdp.data - xdp.data_hard_start);
> +		skb_put(skb, xdp.data_end - xdp.data);
>  		skb->protocol = eth_type_trans(skb, priv->ndev);
>  
>  		if (priv->rx_cksum_offload_flag &&
>  		    rx_info.rx_cksum_result == NETSEC_RX_CKSUM_OK)
>  			skb->ip_summed = CHECKSUM_UNNECESSARY;
>  
> -		if (napi_gro_receive(&priv->napi, skb) != GRO_DROP) {
> +next:
> +		if ((skb && napi_gro_receive(&priv->napi, skb) != GRO_DROP) ||
> +		    xdp_result & NETSEC_XDP_RX_OK) {
>  			ndev->stats.rx_packets++;
> -			ndev->stats.rx_bytes += pkt_len;
> +			ndev->stats.rx_bytes += xdp.data_end - xdp.data;
>  		}
>  
> +		/* Update the descriptor with fresh buffers */
> +		desc->len = desc_len;
> +		desc->dma_addr = dma_handle;
> +		desc->addr = buf_addr;
> +
>  		netsec_rx_fill(priv, idx, 1);
>  		dring->tail = (dring->tail + 1) % DESC_NUM;
>  	}
> +	netsec_finalize_xdp_rx(priv, xdp_act, xdp_xmit);
>  
>  	return done;
>  }

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ