[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <555eee6a7f6661b12de7bd5373e7835a0dc43b65.camel@calian.com>
Date: Fri, 29 Apr 2022 23:09:00 +0000
From: Robert Hancock <robert.hancock@...ian.com>
To: "netdev@...r.kernel.org" <netdev@...r.kernel.org>
CC: "nicolas.ferre@...rochip.com" <nicolas.ferre@...rochip.com>,
"davem@...emloft.net" <davem@...emloft.net>,
"claudiu.beznea@...rochip.com" <claudiu.beznea@...rochip.com>,
"kuba@...nel.org" <kuba@...nel.org>,
"pabeni@...hat.com" <pabeni@...hat.com>,
"edumazet@...gle.com" <edumazet@...gle.com>,
"tomas.melin@...sala.com" <tomas.melin@...sala.com>
Subject: Re: [PATCH net-next 2/2] net: macb: use NAPI for TX completion path
On Fri, 2022-04-29 at 16:31 -0600, Robert Hancock wrote:
> This driver was using the TX IRQ handler to perform all TX completion
> tasks. Under heavy TX network load, this can cause significant irqs-off
> latencies (found to be in the hundreds of microseconds using ftrace).
> This can cause other issues, such as overrunning serial UART FIFOs when
> using high baud rates with limited UART FIFO sizes.
>
> Switch to using the NAPI poll handler to perform the TX completion work
> to get this out of hard IRQ context and avoid the IRQ latency impact.
>
> The TX Used Bit Read interrupt (TXUBR) handling also needs to be moved
> into the NAPI poll handler to maintain the proper order of operations. A
> flag is used to notify the poll handler that a UBR condition needs to be
> handled. The macb_tx_restart handler has had some locking added for global
> register access, since this could now potentially happen concurrently on
> different queues.
>
> Signed-off-by: Robert Hancock <robert.hancock@...ian.com>
> ---
> drivers/net/ethernet/cadence/macb.h | 1 +
> drivers/net/ethernet/cadence/macb_main.c | 138 +++++++++++++----------
> 2 files changed, 80 insertions(+), 59 deletions(-)
>
> diff --git a/drivers/net/ethernet/cadence/macb.h
> b/drivers/net/ethernet/cadence/macb.h
> index f0a7d8396a4a..5355cef95a9b 100644
> --- a/drivers/net/ethernet/cadence/macb.h
> +++ b/drivers/net/ethernet/cadence/macb.h
> @@ -1209,6 +1209,7 @@ struct macb_queue {
> struct macb_tx_skb *tx_skb;
> dma_addr_t tx_ring_dma;
> struct work_struct tx_error_task;
> + bool txubr_pending;
>
> dma_addr_t rx_ring_dma;
> dma_addr_t rx_buffers_dma;
> diff --git a/drivers/net/ethernet/cadence/macb_main.c
> b/drivers/net/ethernet/cadence/macb_main.c
> index 160dc5ad84ae..1cb8afb8ef0d 100644
> --- a/drivers/net/ethernet/cadence/macb_main.c
> +++ b/drivers/net/ethernet/cadence/macb_main.c
> @@ -959,7 +959,7 @@ static int macb_halt_tx(struct macb *bp)
> return -ETIMEDOUT;
> }
>
> -static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb)
> +static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb, int
> budget)
> {
> if (tx_skb->mapping) {
> if (tx_skb->mapped_as_page)
> @@ -972,7 +972,7 @@ static void macb_tx_unmap(struct macb *bp, struct
> macb_tx_skb *tx_skb)
> }
>
> if (tx_skb->skb) {
> - dev_kfree_skb_any(tx_skb->skb);
> + napi_consume_skb(tx_skb->skb, budget);
> tx_skb->skb = NULL;
> }
> }
> @@ -1025,12 +1025,13 @@ static void macb_tx_error_task(struct work_struct
> *work)
> (unsigned int)(queue - bp->queues),
> queue->tx_tail, queue->tx_head);
>
> - /* Prevent the queue IRQ handlers from running: each of them may call
> - * macb_tx_interrupt(), which in turn may call netif_wake_subqueue().
> + /* Prevent the queue NAPI poll from running, as it calls
> + * macb_tx_complete(), which in turn may call netif_wake_subqueue().
> * As explained below, we have to halt the transmission before updating
> * TBQP registers so we call netif_tx_stop_all_queues() to notify the
> * network engine about the macb/gem being halted.
> */
> + napi_disable(&queue->napi);
> spin_lock_irqsave(&bp->lock, flags);
>
> /* Make sure nobody is trying to queue up new packets */
> @@ -1058,7 +1059,7 @@ static void macb_tx_error_task(struct work_struct
> *work)
> if (ctrl & MACB_BIT(TX_USED)) {
> /* skb is set for the last buffer of the frame */
> while (!skb) {
> - macb_tx_unmap(bp, tx_skb);
> + macb_tx_unmap(bp, tx_skb, 0);
> tail++;
> tx_skb = macb_tx_skb(queue, tail);
> skb = tx_skb->skb;
> @@ -1088,7 +1089,7 @@ static void macb_tx_error_task(struct work_struct
> *work)
> desc->ctrl = ctrl | MACB_BIT(TX_USED);
> }
>
> - macb_tx_unmap(bp, tx_skb);
> + macb_tx_unmap(bp, tx_skb, 0);
> }
>
> /* Set end of TX queue */
> @@ -1118,25 +1119,28 @@ static void macb_tx_error_task(struct work_struct
> *work)
> macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
>
> spin_unlock_irqrestore(&bp->lock, flags);
> + napi_enable(&queue->napi);
> }
>
> -static void macb_tx_interrupt(struct macb_queue *queue)
> +static bool macb_tx_complete_pending(struct macb_queue *queue)
> +{
> + if (queue->tx_head != queue->tx_tail) {
> + /* Make hw descriptor updates visible to CPU */
> + rmb();
> +
> + if (macb_tx_desc(queue, queue->tx_tail)->ctrl &
> MACB_BIT(TX_USED))
> + return true;
> + }
> + return false;
> +}
> +
> +static void macb_tx_complete(struct macb_queue *queue, int budget)
> {
> unsigned int tail;
> unsigned int head;
> - u32 status;
> struct macb *bp = queue->bp;
> u16 queue_index = queue - bp->queues;
>
> - status = macb_readl(bp, TSR);
> - macb_writel(bp, TSR, status);
> -
> - if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
> - queue_writel(queue, ISR, MACB_BIT(TCOMP));
> -
> - netdev_vdbg(bp->dev, "macb_tx_interrupt status = 0x%03lx\n",
> - (unsigned long)status);
> -
> head = queue->tx_head;
> for (tail = queue->tx_tail; tail != head; tail++) {
> struct macb_tx_skb *tx_skb;
> @@ -1182,7 +1186,7 @@ static void macb_tx_interrupt(struct macb_queue *queue)
> }
>
> /* Now we can safely release resources */
> - macb_tx_unmap(bp, tx_skb);
> + macb_tx_unmap(bp, tx_skb, budget);
>
> /* skb is set only for the last buffer of the frame.
> * WARNING: at this point skb has been freed by
> @@ -1569,23 +1573,55 @@ static bool macb_rx_pending(struct macb_queue *queue)
> return (desc->addr & MACB_BIT(RX_USED)) != 0;
> }
>
> +static void macb_tx_restart(struct macb_queue *queue)
> +{
> + unsigned int head = queue->tx_head;
> + unsigned int tail = queue->tx_tail;
> + struct macb *bp = queue->bp;
> + unsigned int head_idx, tbqp;
> +
> + if (head == tail)
> + return;
> +
> + tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(bp);
> + tbqp = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, tbqp));
> + head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, head));
> +
> + if (tbqp == head_idx)
> + return;
> +
> + spin_lock_irq(&bp->lock);
> + macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
> + spin_unlock_irq(&bp->lock);
> +}
> +
> static int macb_poll(struct napi_struct *napi, int budget)
> {
> struct macb_queue *queue = container_of(napi, struct macb_queue, napi);
> struct macb *bp = queue->bp;
> int work_done;
>
> + macb_tx_complete(queue, budget);
> +
> + rmb(); // ensure txubr_pending is up to date
> + if (queue->txubr_pending) {
> + queue->txubr_pending = false;
> + netdev_vdbg(bp->dev, "poll: tx restart\n");
> + macb_tx_restart(queue);
> + }
> +
Thinking about this a bit more, I wonder if we could just do this tx_restart
call unconditionally here? Then we wouldn't need this txubr_pending flag at
all. CCing Tomas Melin who worked on this tx_restart code recently.
> work_done = bp->macbgem_ops.mog_rx(queue, napi, budget);
>
> netdev_vdbg(bp->dev, "poll: queue = %u, work_done = %d, budget = %d\n",
> (unsigned int)(queue - bp->queues), work_done, budget);
>
> if (work_done < budget && napi_complete_done(napi, work_done)) {
> - queue_writel(queue, IER, bp->rx_intr_mask);
> + queue_writel(queue, IER, bp->rx_intr_mask |
> + MACB_BIT(TCOMP));
>
> /* Packet completions only seem to propagate to raise
> * interrupts when interrupts are enabled at the time, so if
> - * packets were received while interrupts were disabled,
> + * packets were sent/received while interrupts were disabled,
> * they will not cause another interrupt to be generated when
> * interrupts are re-enabled.
> * Check for this case here to avoid losing a wakeup. This can
> @@ -1593,10 +1629,13 @@ static int macb_poll(struct napi_struct *napi, int
> budget)
> * actions if an interrupt is raised just after enabling them,
> * but this should be harmless.
> */
> - if (macb_rx_pending(queue)) {
> - queue_writel(queue, IDR, bp->rx_intr_mask);
> + if (macb_rx_pending(queue) ||
> + macb_tx_complete_pending(queue)) {
> + queue_writel(queue, IDR, bp->rx_intr_mask |
> + MACB_BIT(TCOMP));
> if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
> - queue_writel(queue, ISR, MACB_BIT(RCOMP));
> + queue_writel(queue, ISR, MACB_BIT(RCOMP) |
> + MACB_BIT(TCOMP));
> netdev_vdbg(bp->dev, "poll: packets pending,
> reschedule\n");
> napi_schedule(napi);
> }
> @@ -1646,29 +1685,6 @@ static void macb_hresp_error_task(struct
> tasklet_struct *t)
> netif_tx_start_all_queues(dev);
> }
>
> -static void macb_tx_restart(struct macb_queue *queue)
> -{
> - unsigned int head = queue->tx_head;
> - unsigned int tail = queue->tx_tail;
> - struct macb *bp = queue->bp;
> - unsigned int head_idx, tbqp;
> -
> - if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
> - queue_writel(queue, ISR, MACB_BIT(TXUBR));
> -
> - if (head == tail)
> - return;
> -
> - tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(bp);
> - tbqp = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, tbqp));
> - head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, head));
> -
> - if (tbqp == head_idx)
> - return;
> -
> - macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
> -}
> -
> static irqreturn_t macb_wol_interrupt(int irq, void *dev_id)
> {
> struct macb_queue *queue = dev_id;
> @@ -1754,19 +1770,29 @@ static irqreturn_t macb_interrupt(int irq, void
> *dev_id)
> (unsigned int)(queue - bp->queues),
> (unsigned long)status);
>
> - if (status & bp->rx_intr_mask) {
> - /* There's no point taking any more interrupts
> - * until we have processed the buffers. The
> + if (status & (bp->rx_intr_mask |
> + MACB_BIT(TCOMP) |
> + MACB_BIT(TXUBR))) {
> + /* There's no point taking any more RX/TX completion
> + * interrupts until we have processed the buffers. The
> * scheduling call may fail if the poll routine
> * is already scheduled, so disable interrupts
> * now.
> */
> - queue_writel(queue, IDR, bp->rx_intr_mask);
> + queue_writel(queue, IDR, bp->rx_intr_mask |
> + MACB_BIT(TCOMP));
> if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
> - queue_writel(queue, ISR, MACB_BIT(RCOMP));
> + queue_writel(queue, ISR, MACB_BIT(RCOMP) |
> + MACB_BIT(TCOMP) |
> + MACB_BIT(TXUBR));
> +
> + if (status & MACB_BIT(TXUBR)) {
> + queue->txubr_pending = true;
> + wmb(); // ensure softirq can see update
> + }
>
> if (napi_schedule_prep(&queue->napi)) {
> - netdev_vdbg(bp->dev, "scheduling RX
> softirq\n");
> + netdev_vdbg(bp->dev, "scheduling NAPI
> softirq\n");
> __napi_schedule(&queue->napi);
> }
> }
> @@ -1781,12 +1807,6 @@ static irqreturn_t macb_interrupt(int irq, void
> *dev_id)
> break;
> }
>
> - if (status & MACB_BIT(TCOMP))
> - macb_tx_interrupt(queue);
> -
> - if (status & MACB_BIT(TXUBR))
> - macb_tx_restart(queue);
> -
> /* Link change detection isn't possible with RMII, so we'll
> * add that if/when we get our hands on a full-blown MII PHY.
> */
> @@ -2019,7 +2039,7 @@ static unsigned int macb_tx_map(struct macb *bp,
> for (i = queue->tx_head; i != tx_head; i++) {
> tx_skb = macb_tx_skb(queue, i);
>
> - macb_tx_unmap(bp, tx_skb);
> + macb_tx_unmap(bp, tx_skb, 0);
> }
>
> return 0;
--
Robert Hancock
Senior Hardware Designer, Calian Advanced Technologies
www.calian.com
Powered by blists - more mailing lists