lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220405211230.4a1a868d@kernel.org>
Date:   Tue, 5 Apr 2022 21:12:30 -0700
From:   Jakub Kicinski <kuba@...nel.org>
To:     Horatiu Vultur <horatiu.vultur@...rochip.com>
Cc:     <linux-kernel@...r.kernel.org>, <netdev@...r.kernel.org>,
        <UNGLinuxDriver@...rochip.com>, <davem@...emloft.net>,
        <pabeni@...hat.com>, <michael@...le.cc>
Subject: Re: [PATCH net-next v3 3/4] net: lan966x: Add FDMA functionality

On Mon, 4 Apr 2022 15:06:54 +0200 Horatiu Vultur wrote:
> Ethernet frames can be extracted or injected to or from the device's
> DDR memory. There is one channel for injection and one channel for
> extraction. Each of these channels contain a linked list of DCBs which
> contains DB. The DCB contains only 1 DB for both the injection and
> extraction. Each DB contains a frame. Every time when a frame is received
> or transmitted an interrupt is generated.
> 
> It is not possible to use both the FDMA and the manual
> injection/extraction of the frames. Therefore the FDMA has priority over
> the manual because of better performance values.
> 
> FDMA:
> iperf -c 192.168.1.1
> [  5]   0.00-10.02  sec   420 MBytes   352 Mbits/sec    0 sender
> [  5]   0.00-10.03  sec   420 MBytes   351 Mbits/sec      receiver
> 
> iperf -c 192.168.1.1 -R
> [  5]   0.00-10.01  sec   528 MBytes   442 Mbits/sec    0 sender
> [  5]   0.00-10.00  sec   524 MBytes   440 Mbits/sec      receiver
> 
> Manual:
> iperf -c 192.168.1.1
> [  5]   0.00-10.02  sec  93.8 MBytes  78.5 Mbits/sec    0 sender
> [  5]   0.00-10.03  sec  93.8 MBytes  78.4 Mbits/sec      receiver
> 
> ipers -c 192.168.1.1 -R
> [  5]   0.00-10.03  sec   121 MBytes   101 Mbits/sec    0 sender
> [  5]   0.00-10.01  sec   118 MBytes  99.0 Mbits/sec      receiver
> 
> Signed-off-by: Horatiu Vultur <horatiu.vultur@...rochip.com>

> +static struct sk_buff *lan966x_fdma_rx_alloc_skb(struct lan966x_rx *rx,
> +						 struct lan966x_db *db)
> +{
> +	struct lan966x *lan966x = rx->lan966x;
> +	struct sk_buff *skb;
> +	dma_addr_t dma_addr;
> +	struct page *page;
> +	void *buff_addr;
> +
> +	page = dev_alloc_pages(rx->page_order);
> +	if (unlikely(!page))
> +		return NULL;
> +
> +	dma_addr = dma_map_page(lan966x->dev, page, 0,
> +				PAGE_SIZE << rx->page_order,
> +				DMA_FROM_DEVICE);
> +	if (unlikely(dma_mapping_error(lan966x->dev, dma_addr)))
> +		goto free_page;
> +
> +	buff_addr = page_address(page);
> +	skb = build_skb(buff_addr, PAGE_SIZE << rx->page_order);

build_skb() after the packet comes in rather than upfront, that way 
the skb will be in the CPU cache already when sent up the stack.

> +	if (unlikely(!skb))
> +		goto unmap_page;
> +
> +	db->dataptr = dma_addr;
> +
> +	return skb;
> +
> +unmap_page:
> +	dma_unmap_page(lan966x->dev, dma_addr,
> +		       PAGE_SIZE << rx->page_order,
> +		       DMA_FROM_DEVICE);
> +
> +free_page:
> +	__free_pages(page, rx->page_order);
> +	return NULL;
> +}

> +static int lan966x_fdma_tx_alloc(struct lan966x_tx *tx)
> +{
> +	struct lan966x *lan966x = tx->lan966x;
> +	struct lan966x_tx_dcb *dcb;
> +	struct lan966x_db *db;
> +	int size;
> +	int i, j;
> +
> +	tx->dcbs_buf = kcalloc(FDMA_DCB_MAX, sizeof(struct lan966x_tx_dcb_buf),
> +			       GFP_ATOMIC);
> +	if (!tx->dcbs_buf)
> +		return -ENOMEM;
> +
> +	/* calculate how many pages are needed to allocate the dcbs */
> +	size = sizeof(struct lan966x_tx_dcb) * FDMA_DCB_MAX;
> +	size = ALIGN(size, PAGE_SIZE);
> +	tx->dcbs = dma_alloc_coherent(lan966x->dev, size, &tx->dma, GFP_ATOMIC);

This functions seems to only be called from probe, so GFP_KERNEL 
is better.

> +	if (!tx->dcbs)
> +		goto out;
> +
> +	/* Now for each dcb allocate the db */
> +	for (i = 0; i < FDMA_DCB_MAX; ++i) {
> +		dcb = &tx->dcbs[i];
> +
> +		for (j = 0; j < FDMA_TX_DCB_MAX_DBS; ++j) {
> +			db = &dcb->db[j];
> +			db->dataptr = 0;
> +			db->status = 0;
> +		}
> +
> +		lan966x_fdma_tx_add_dcb(tx, dcb);
> +	}
> +
> +	return 0;
> +
> +out:
> +	kfree(tx->dcbs_buf);
> +	return -ENOMEM;
> +}

> +static void lan966x_fdma_wakeup_netdev(struct lan966x *lan966x)
> +{
> +	struct lan966x_port *port;
> +	int i;
> +
> +	for (i = 0; i < lan966x->num_phys_ports; ++i) {
> +		port = lan966x->ports[i];
> +		if (!port)
> +			continue;
> +
> +		if (netif_queue_stopped(port->dev))
> +			netif_wake_queue(port->dev);
> +	}
> +}
> +
> +static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight)
> +{
> +	struct lan966x_tx *tx = &lan966x->tx;
> +	struct lan966x_tx_dcb_buf *dcb_buf;
> +	struct lan966x_db *db;
> +	unsigned long flags;
> +	bool clear = false;
> +	int i;
> +
> +	spin_lock_irqsave(&lan966x->tx_lock, flags);
> +	for (i = 0; i < FDMA_DCB_MAX; ++i) {
> +		dcb_buf = &tx->dcbs_buf[i];
> +
> +		if (!dcb_buf->used)
> +			continue;
> +
> +		db = &tx->dcbs[i].db[0];
> +		if (!(db->status & FDMA_DCB_STATUS_DONE))
> +			continue;
> +
> +		dcb_buf->dev->stats.tx_packets++;
> +		dcb_buf->dev->stats.tx_bytes += dcb_buf->skb->len;
> +
> +		dcb_buf->used = false;
> +		dma_unmap_single(lan966x->dev,
> +				 dcb_buf->dma_addr,
> +				 dcb_buf->skb->len,
> +				 DMA_TO_DEVICE);
> +		if (!dcb_buf->ptp)
> +			dev_kfree_skb_any(dcb_buf->skb);
> +
> +		clear = true;
> +	}
> +	spin_unlock_irqrestore(&lan966x->tx_lock, flags);
> +
> +	if (clear)
> +		lan966x_fdma_wakeup_netdev(lan966x);

You may want to keep this call inside the lock so that the tx path
doesn't kick in between unlock and wake and fill up the queues.

> +}
> +
> +static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx)
> +{
> +	struct lan966x *lan966x = rx->lan966x;
> +	u64 src_port, timestamp;
> +	struct lan966x_db *db;
> +	struct sk_buff *skb;
> +
> +	/* Check if there is any data */
> +	db = &rx->dcbs[rx->dcb_index].db[rx->db_index];
> +	if (unlikely(!(db->status & FDMA_DCB_STATUS_DONE)))
> +		return NULL;
> +
> +	/* Get the received frame and unmap it */
> +	skb = rx->skb[rx->dcb_index][rx->db_index];
> +	dma_unmap_single(lan966x->dev, (dma_addr_t)db->dataptr,
> +			 FDMA_DCB_STATUS_BLOCKL(db->status),
> +			 DMA_FROM_DEVICE);
> +
> +	skb_put(skb, FDMA_DCB_STATUS_BLOCKL(db->status));
> +
> +	lan966x_ifh_get_src_port(skb->data, &src_port);
> +	lan966x_ifh_get_timestamp(skb->data, &timestamp);
> +
> +	WARN_ON(src_port >= lan966x->num_phys_ports);
> +
> +	skb->dev = lan966x->ports[src_port]->dev;
> +	skb_pull(skb, IFH_LEN * sizeof(u32));
> +
> +	if (likely(!(skb->dev->features & NETIF_F_RXFCS)))
> +		skb_trim(skb, skb->len - ETH_FCS_LEN);
> +
> +	lan966x_ptp_rxtstamp(lan966x, skb, timestamp);
> +	skb->protocol = eth_type_trans(skb, skb->dev);
> +
> +	if (lan966x->bridge_mask & BIT(src_port)) {
> +		skb->offload_fwd_mark = 1;
> +
> +		skb_reset_network_header(skb);
> +		if (!lan966x_hw_offload(lan966x, src_port, skb))
> +			skb->offload_fwd_mark = 0;
> +	}
> +
> +	skb->dev->stats.rx_bytes += skb->len;
> +	skb->dev->stats.rx_packets++;
> +
> +	return skb;
> +}
> +
> +static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight)
> +{
> +	struct lan966x *lan966x = container_of(napi, struct lan966x, napi);
> +	struct lan966x_rx *rx = &lan966x->rx;
> +	int dcb_reload = rx->dcb_index;
> +	struct lan966x_rx_dcb *old_dcb;
> +	struct lan966x_db *db;
> +	struct sk_buff *skb;
> +	int counter = 0;
> +	u64 nextptr;
> +
> +	lan966x_fdma_tx_clear_buf(lan966x, weight);
> +
> +	/* Get all received skb */
> +	while (counter < weight) {
> +		skb = lan966x_fdma_rx_get_frame(rx);
> +		if (!skb)
> +			break;
> +		napi_gro_receive(&lan966x->napi, skb);
> +
> +		rx->skb[rx->dcb_index][rx->db_index] = NULL;
> +
> +		rx->dcb_index++;
> +		rx->dcb_index &= FDMA_DCB_MAX - 1;
> +		counter++;
> +	}
> +
> +	/* Allocate new skbs and map them */
> +	while (dcb_reload != rx->dcb_index) {
> +		db = &rx->dcbs[dcb_reload].db[rx->db_index];
> +		skb = lan966x_fdma_rx_alloc_skb(rx, db);
> +		if (unlikely(!skb))
> +			break;
> +		rx->skb[dcb_reload][rx->db_index] = skb;
> +
> +		old_dcb = &rx->dcbs[dcb_reload];
> +		dcb_reload++;
> +		dcb_reload &= FDMA_DCB_MAX - 1;
> +
> +		nextptr = rx->dma + ((unsigned long)old_dcb -
> +				     (unsigned long)rx->dcbs);
> +		lan966x_fdma_rx_add_dcb(rx, old_dcb, nextptr);
> +		lan966x_fdma_rx_reload(rx);
> +	}
> +
> +	if (counter < weight && napi_complete_done(napi, counter))
> +		lan_wr(0xff, lan966x, FDMA_INTR_DB_ENA);
> +
> +	return counter;
> +}
> +
> +irqreturn_t lan966x_fdma_irq_handler(int irq, void *args)
> +{
> +	struct lan966x *lan966x = args;
> +	u32 db, err, err_type;
> +
> +	db = lan_rd(lan966x, FDMA_INTR_DB);
> +	err = lan_rd(lan966x, FDMA_INTR_ERR);
> +
> +	if (db) {
> +		lan_wr(0, lan966x, FDMA_INTR_DB_ENA);
> +		lan_wr(db, lan966x, FDMA_INTR_DB);
> +
> +		napi_schedule(&lan966x->napi);
> +	}
> +
> +	if (err) {
> +		err_type = lan_rd(lan966x, FDMA_ERRORS);
> +
> +		WARN(1, "Unexpected error: %d, error_type: %d\n", err, err_type);
> +
> +		lan_wr(err, lan966x, FDMA_INTR_ERR);
> +		lan_wr(err_type, lan966x, FDMA_ERRORS);
> +	}
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static int lan966x_fdma_get_next_dcb(struct lan966x_tx *tx)
> +{
> +	struct lan966x_tx_dcb_buf *dcb_buf;
> +	int i;
> +
> +	for (i = 0; i < FDMA_DCB_MAX; ++i) {
> +		dcb_buf = &tx->dcbs_buf[i];
> +		if (!dcb_buf->used && i != tx->last_in_use)
> +			return i;
> +	}
> +
> +	return -1;
> +}
> +
> +int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
> +{
> +	struct lan966x_port *port = netdev_priv(dev);
> +	struct lan966x *lan966x = port->lan966x;
> +	struct lan966x_tx_dcb_buf *next_dcb_buf;
> +	struct lan966x_tx_dcb *next_dcb, *dcb;
> +	struct lan966x_tx *tx = &lan966x->tx;
> +	struct lan966x_db *next_db;
> +	int needed_headroom;
> +	int needed_tailroom;
> +	dma_addr_t dma_addr;
> +	int next_to_use;
> +	int err;
> +
> +	/* Get next index */
> +	next_to_use = lan966x_fdma_get_next_dcb(tx);
> +	if (next_to_use < 0) {
> +		netif_stop_queue(dev);
> +		return NETDEV_TX_BUSY;
> +	}
> +
> +	if (skb_put_padto(skb, ETH_ZLEN)) {
> +		dev->stats.tx_dropped++;
> +		return NETDEV_TX_OK;
> +	}
> +
> +	/* skb processing */
> +	needed_headroom = max_t(int, IFH_LEN * sizeof(u32) - skb_headroom(skb), 0);
> +	needed_tailroom = max_t(int, ETH_FCS_LEN - skb_tailroom(skb), 0);
> +	if (needed_headroom || needed_tailroom || skb_header_cloned(skb)) {
> +		err = pskb_expand_head(skb, needed_headroom, needed_tailroom,
> +				       GFP_ATOMIC);
> +		if (unlikely(err)) {
> +			dev->stats.tx_dropped++;
> +			err = NETDEV_TX_OK;
> +			goto release;
> +		}
> +	}
> +
> +	skb_tx_timestamp(skb);

This could move down after the dma mapping, so it's closer to when 
the devices gets ownership.

> +	skb_push(skb, IFH_LEN * sizeof(u32));
> +	memcpy(skb->data, ifh, IFH_LEN * sizeof(u32));
> +	skb_put(skb, 4);
> +
> +	dma_addr = dma_map_single(lan966x->dev, skb->data, skb->len,
> +				  DMA_TO_DEVICE);
> +	if (dma_mapping_error(lan966x->dev, dma_addr)) {
> +		dev->stats.tx_dropped++;
> +		err = NETDEV_TX_OK;
> +		goto release;
> +	}
> +
> +	/* Setup next dcb */
> +	next_dcb = &tx->dcbs[next_to_use];
> +	next_dcb->nextptr = FDMA_DCB_INVALID_DATA;
> +
> +	next_db = &next_dcb->db[0];
> +	next_db->dataptr = dma_addr;
> +	next_db->status = FDMA_DCB_STATUS_SOF |
> +			  FDMA_DCB_STATUS_EOF |
> +			  FDMA_DCB_STATUS_INTR |
> +			  FDMA_DCB_STATUS_BLOCKO(0) |
> +			  FDMA_DCB_STATUS_BLOCKL(skb->len);
> +
> +	/* Fill up the buffer */
> +	next_dcb_buf = &tx->dcbs_buf[next_to_use];
> +	next_dcb_buf->skb = skb;
> +	next_dcb_buf->dma_addr = dma_addr;
> +	next_dcb_buf->used = true;
> +	next_dcb_buf->ptp = false;
> +	next_dcb_buf->dev = dev;
> +
> +	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
> +	    LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
> +		next_dcb_buf->ptp = true;
> +
> +	if (likely(lan966x->tx.activated)) {
> +		/* Connect current dcb to the next db */
> +		dcb = &tx->dcbs[tx->last_in_use];
> +		dcb->nextptr = tx->dma + (next_to_use *
> +					  sizeof(struct lan966x_tx_dcb));
> +
> +		lan966x_fdma_tx_reload(tx);
> +	} else {
> +		/* Because it is first time, then just activate */
> +		lan966x->tx.activated = true;
> +		lan966x_fdma_tx_activate(tx);
> +	}
> +
> +	/* Move to next dcb because this last in use */
> +	tx->last_in_use = next_to_use;
> +
> +	return NETDEV_TX_OK;
> +
> +release:
> +	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
> +	    LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
> +		lan966x_ptp_txtstamp_release(port, skb);
> +
> +	dev_kfree_skb_any(skb);
> +	return err;
> +}
> +
> +void lan966x_fdma_netdev_init(struct lan966x *lan966x, struct net_device *dev)
> +{
> +	if (lan966x->fdma_ndev)
> +		return;
> +
> +	lan966x->fdma_ndev = dev;
> +	netif_napi_add(dev, &lan966x->napi, lan966x_fdma_napi_poll,
> +		       FDMA_WEIGHT);

Just use NAPI_POLL_WEIGHT. We should just remove the last argument 
to netif_napi_add() completely but that's another story..

> +	napi_enable(&lan966x->napi);
> +}

> +
> +	if (lan966x->fdma_irq) {
> +		disable_irq(lan966x->fdma_irq);

You don't add any enable_irq() calls, maybe devm_free_irq() is a better
choice?

> +		lan966x->fdma_irq = -ENXIO;

Semantics of lan966x->fdma_irq are pretty unclear.
Why is the condition not "> 0" for this block?

> +	}
>  }
>  
>  static int lan966x_probe_port(struct lan966x *lan966x, u32 p,
> @@ -790,12 +801,12 @@ static void lan966x_init(struct lan966x *lan966x)
>  	/* Do byte-swap and expect status after last data word
>  	 * Extraction: Mode: manual extraction) | Byte_swap
>  	 */
> -	lan_wr(QS_XTR_GRP_CFG_MODE_SET(1) |
> +	lan_wr(QS_XTR_GRP_CFG_MODE_SET(lan966x->fdma ? 2 : 1) |
>  	       QS_XTR_GRP_CFG_BYTE_SWAP_SET(1),
>  	       lan966x, QS_XTR_GRP_CFG(0));
>  
>  	/* Injection: Mode: manual injection | Byte_swap */
> -	lan_wr(QS_INJ_GRP_CFG_MODE_SET(1) |
> +	lan_wr(QS_INJ_GRP_CFG_MODE_SET(lan966x->fdma ? 2 : 1) |
>  	       QS_INJ_GRP_CFG_BYTE_SWAP_SET(1),
>  	       lan966x, QS_INJ_GRP_CFG(0));
>  
> @@ -1017,6 +1028,17 @@ static int lan966x_probe(struct platform_device *pdev)
>  		lan966x->ptp = 1;
>  	}
>  
> +	lan966x->fdma_irq = platform_get_irq_byname(pdev, "fdma");
> +	if (lan966x->fdma_irq > 0) {
> +		err = devm_request_irq(&pdev->dev, lan966x->fdma_irq,
> +				       lan966x_fdma_irq_handler, 0,
> +				       "fdma irq", lan966x);
> +		if (err)
> +			return dev_err_probe(&pdev->dev, err, "Unable to use fdma irq");
> +
> +		lan966x->fdma = true;
> +	}
> +
>  	/* init switch */
>  	lan966x_init(lan966x);
>  	lan966x_stats_init(lan966x);
> @@ -1055,8 +1077,15 @@ static int lan966x_probe(struct platform_device *pdev)
>  	if (err)
>  		goto cleanup_fdb;
>  
> +	err = lan966x_fdma_init(lan966x);

At at quick read it's unclear why this call is not conditional 
on lan988x->fdma ?

> +	if (err)
> +		goto cleanup_ptp;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ