[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <SA1PR02MB856018755A47967B5842A4C4C7C19@SA1PR02MB8560.namprd02.prod.outlook.com>
Date: Mon, 2 May 2022 19:30:51 +0000
From: Radhey Shyam Pandey <radheys@...inx.com>
To: Robert Hancock <robert.hancock@...ian.com>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>
CC: "davem@...emloft.net" <davem@...emloft.net>,
"edumazet@...gle.com" <edumazet@...gle.com>,
"kuba@...nel.org" <kuba@...nel.org>,
"pabeni@...hat.com" <pabeni@...hat.com>,
Michal Simek <michals@...inx.com>,
"linux-arm-kernel@...ts.infradead.org"
<linux-arm-kernel@...ts.infradead.org>,
Harini Katakam <harinik@...inx.com>
Subject: RE: [PATCH net-next] net: axienet: Use NAPI for TX completion path
> -----Original Message-----
> From: Robert Hancock <robert.hancock@...ian.com>
> Sent: Saturday, April 30, 2022 3:59 AM
> To: netdev@...r.kernel.org
> Cc: Radhey Shyam Pandey <radheys@...inx.com>; davem@...emloft.net;
> edumazet@...gle.com; kuba@...nel.org; pabeni@...hat.com; Michal Simek
> <michals@...inx.com>; linux-arm-kernel@...ts.infradead.org; Robert Hancock
> <robert.hancock@...ian.com>
> Subject: [PATCH net-next] net: axienet: Use NAPI for TX completion path
>
> This driver was using the TX IRQ handler to perform all TX completion
> tasks. Under heavy TX network load, this can cause significant irqs-off
> latencies (found to be in the hundreds of microseconds using ftrace).
> This can cause other issues, such as overrunning serial UART FIFOs when
> using high baud rates with limited UART FIFO sizes.
>
> Switch to using the NAPI poll handler to perform the TX completion work
> to get this out of hard IRQ context and avoid the IRQ latency impact.
Thanks for the patch. I assume for simulating heavy network load we
are using netperf/iperf. Do we have some details on the benchmark
before and after adding TX NAPI? I want to see the impact on
throughput.
>
> Signed-off-by: Robert Hancock <robert.hancock@...ian.com>
> ---
> drivers/net/ethernet/xilinx/xilinx_axienet.h | 2 +
> .../net/ethernet/xilinx/xilinx_axienet_main.c | 56 ++++++++++++-------
> 2 files changed, 37 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h
> b/drivers/net/ethernet/xilinx/xilinx_axienet.h
> index d5c1e5c4a508..6e58d034fe90 100644
> --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
> +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
> @@ -397,6 +397,7 @@ struct axidma_bd {
> * @regs: Base address for the axienet_local device address space
> * @dma_regs: Base address for the axidma device address space
> * @rx_dma_cr: Nominal content of RX DMA control register
> + * @tx_dma_cr: Nominal content of TX DMA control register
> * @dma_err_task: Work structure to process Axi DMA errors
> * @tx_irq: Axidma TX IRQ number
> * @rx_irq: Axidma RX IRQ number
> @@ -454,6 +455,7 @@ struct axienet_local {
> void __iomem *dma_regs;
>
> u32 rx_dma_cr;
> + u32 tx_dma_cr;
>
> struct work_struct dma_err_task;
>
> diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> index d6fc3f7acdf0..a52e616275e4 100644
> --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> @@ -254,8 +254,6 @@ static u32 axienet_usec_to_timer(struct axienet_local
> *lp, u32 coalesce_usec)
> */
> static void axienet_dma_start(struct axienet_local *lp)
> {
> - u32 tx_cr;
> -
> /* Start updating the Rx channel control register */
> lp->rx_dma_cr = (lp->coalesce_count_rx <<
> XAXIDMA_COALESCE_SHIFT) |
> XAXIDMA_IRQ_IOC_MASK |
> XAXIDMA_IRQ_ERROR_MASK;
> @@ -269,16 +267,16 @@ static void axienet_dma_start(struct axienet_local
> *lp)
> axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr);
>
> /* Start updating the Tx channel control register */
> - tx_cr = (lp->coalesce_count_tx << XAXIDMA_COALESCE_SHIFT) |
> - XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_ERROR_MASK;
> + lp->tx_dma_cr = (lp->coalesce_count_tx <<
> XAXIDMA_COALESCE_SHIFT) |
> + XAXIDMA_IRQ_IOC_MASK |
> XAXIDMA_IRQ_ERROR_MASK;
> /* Only set interrupt delay timer if not generating an interrupt on
> * the first TX packet. Otherwise leave at 0 to disable delay interrupt.
> */
> if (lp->coalesce_count_tx > 1)
> - tx_cr |= (axienet_usec_to_timer(lp, lp->coalesce_usec_tx)
> - << XAXIDMA_DELAY_SHIFT) |
> - XAXIDMA_IRQ_DELAY_MASK;
> - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, tx_cr);
> + lp->tx_dma_cr |= (axienet_usec_to_timer(lp, lp-
> >coalesce_usec_tx)
> + << XAXIDMA_DELAY_SHIFT) |
> + XAXIDMA_IRQ_DELAY_MASK;
> + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp->tx_dma_cr);
>
> /* Populate the tail pointer and bring the Rx Axi DMA engine out of
> * halted state. This will make the Rx side ready for reception.
> @@ -294,8 +292,8 @@ static void axienet_dma_start(struct axienet_local *lp)
> * tail pointer register that the Tx channel will start transmitting.
> */
> axienet_dma_out_addr(lp, XAXIDMA_TX_CDESC_OFFSET, lp-
> >tx_bd_p);
> - tx_cr |= XAXIDMA_CR_RUNSTOP_MASK;
> - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, tx_cr);
> + lp->tx_dma_cr |= XAXIDMA_CR_RUNSTOP_MASK;
> + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp->tx_dma_cr);
> }
>
> /**
> @@ -671,13 +669,14 @@ static int axienet_device_reset(struct net_device
> *ndev)
> * @nr_bds: Number of descriptors to clean up, can be -1 if unknown.
> * @sizep: Pointer to a u32 filled with the total sum of all bytes
> * in all cleaned-up descriptors. Ignored if NULL.
> + * @budget: NAPI budget (use 0 when not called from NAPI poll)
> *
> * Would either be called after a successful transmit operation, or after
> * there was an error when setting up the chain.
> * Returns the number of descriptors handled.
> */
> static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
> - int nr_bds, u32 *sizep)
> + int nr_bds, u32 *sizep, int budget)
> {
> struct axienet_local *lp = netdev_priv(ndev);
> struct axidma_bd *cur_p;
> @@ -707,7 +706,7 @@ static int axienet_free_tx_chain(struct net_device
> *ndev, u32 first_bd,
> DMA_TO_DEVICE);
>
> if (cur_p->skb && (status &
> XAXIDMA_BD_STS_COMPLETE_MASK))
> - dev_consume_skb_irq(cur_p->skb);
> + napi_consume_skb(cur_p->skb, budget);
>
> cur_p->app0 = 0;
> cur_p->app1 = 0;
> @@ -756,20 +755,24 @@ static inline int axienet_check_tx_bd_space(struct
> axienet_local *lp,
> * axienet_start_xmit_done - Invoked once a transmit is completed by the
> * Axi DMA Tx channel.
> * @ndev: Pointer to the net_device structure
> + * @budget: NAPI budget
> *
> - * This function is invoked from the Axi DMA Tx isr to notify the completion
> + * This function is invoked from the NAPI processing to notify the completion
> * of transmit operation. It clears fields in the corresponding Tx BDs and
> * unmaps the corresponding buffer so that CPU can regain ownership of the
> * buffer. It finally invokes "netif_wake_queue" to restart transmission if
> * required.
> */
> -static void axienet_start_xmit_done(struct net_device *ndev)
> +static void axienet_start_xmit_done(struct net_device *ndev, int budget)
> {
> struct axienet_local *lp = netdev_priv(ndev);
> u32 packets = 0;
> u32 size = 0;
>
> - packets = axienet_free_tx_chain(ndev, lp->tx_bd_ci, -1, &size);
> + packets = axienet_free_tx_chain(ndev, lp->tx_bd_ci, -1, &size, budget);
> +
> + if (!packets)
> + return;
>
> lp->tx_bd_ci += packets;
> if (lp->tx_bd_ci >= lp->tx_bd_num)
> @@ -865,7 +868,7 @@ axienet_start_xmit(struct sk_buff *skb, struct
> net_device *ndev)
> netdev_err(ndev, "TX DMA mapping error\n");
> ndev->stats.tx_dropped++;
> axienet_free_tx_chain(ndev, orig_tail_ptr, ii + 1,
> - NULL);
> + NULL, 0);
> lp->tx_bd_tail = orig_tail_ptr;
>
> return NETDEV_TX_OK;
> @@ -899,9 +902,9 @@ axienet_start_xmit(struct sk_buff *skb, struct
> net_device *ndev)
> }
>
> /**
> - * axienet_poll - Triggered by RX ISR to complete the received BD processing.
> + * axienet_poll - Triggered by RX/TX ISR to complete the BD processing.
> * @napi: Pointer to NAPI structure.
> - * @budget: Max number of packets to process.
> + * @budget: Max number of RX packets to process.
> *
> * Return: Number of RX packets processed.
> */
> @@ -916,6 +919,8 @@ static int axienet_poll(struct napi_struct *napi, int
> budget)
> struct sk_buff *skb, *new_skb;
> struct axienet_local *lp = container_of(napi, struct axienet_local,
> napi);
>
> + axienet_start_xmit_done(lp->ndev, budget);
> +
> cur_p = &lp->rx_bd_v[lp->rx_bd_ci];
>
> while (packets < budget && (cur_p->status &
> XAXIDMA_BD_STS_COMPLETE_MASK)) {
> @@ -1001,11 +1006,12 @@ static int axienet_poll(struct napi_struct *napi, int
> budget)
> axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET,
> tail_p);
>
> if (packets < budget && napi_complete_done(napi, packets)) {
> - /* Re-enable RX completion interrupts. This should
> - * cause an immediate interrupt if any RX packets are
> + /* Re-enable RX/TX completion interrupts. This should
> + * cause an immediate interrupt if any RX/TX packets are
> * already pending.
> */
> axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp-
> >rx_dma_cr);
> + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp-
> >tx_dma_cr);
> }
> return packets;
> }
> @@ -1040,7 +1046,15 @@ static irqreturn_t axienet_tx_irq(int irq, void
> *_ndev)
> (lp->tx_bd_v[lp->tx_bd_ci]).phys);
> schedule_work(&lp->dma_err_task);
> } else {
> - axienet_start_xmit_done(lp->ndev);
> + /* Disable further TX completion interrupts and schedule
> + * NAPI to handle the completions.
> + */
> + u32 cr = lp->tx_dma_cr;
> +
> + cr &= ~(XAXIDMA_IRQ_IOC_MASK |
> XAXIDMA_IRQ_DELAY_MASK);
> + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr);
> +
> + napi_schedule(&lp->napi);
> }
>
> return IRQ_HANDLED;
> --
> 2.31.1
Powered by blists - more mailing lists