[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAKv+Gu-AWV94vVThi-YPLWganMgNc=+OUZOG6jeqeLzUOMb+Nw@mail.gmail.com>
Date: Fri, 14 Dec 2018 11:59:00 +0100
From: Ard Biesheuvel <ard.biesheuvel@...aro.org>
To: Ilias Apalodimas <ilias.apalodimas@...aro.org>
Cc: "<netdev@...r.kernel.org>" <netdev@...r.kernel.org>,
Jaswinder Singh <jaswinder.singh@...aro.org>,
Masami Hiramatsu <masami.hiramatsu@...aro.org>,
Masahisa Kojima <masahisa.kojima@...aro.org>
Subject: Re: [net-next, PATCH 2/2] net: socionext: remove mmio reads on Tx
On Fri, 14 Dec 2018 at 09:59, Ilias Apalodimas
<ilias.apalodimas@...aro.org> wrote:
>
> Currently the driver issues 2 mmio reads to figure out the number of
> transmitted packets and clean them. We can get rid of the expensive
> reads since BIT 31 of the Tx descriptor can be used for that.
> We can also remove the budget counting of Tx completions since all of
> the descriptors are not deliberately processed.
>
This last sentence reflects what we discussed off-line, right? That
reaping used Tx descriptors shouldn't be counted against the budget,
but should only occur if the budget is > 0 after processing the Rx
queue?
</ilias.apalodimas@...aro.org>
> Performance numbers using pktgen are:
> size pre-patch(pps) post-patch(pps)
> 64 362483 427916
> 128 358315 411686
> 256 352725 389683
> 512 215675 216464
> 1024 113812 114442
>
> Signed-off-by: Ilias Apalodimas <ilias.apalodimas@...aro.org>
> ---
> drivers/net/ethernet/socionext/netsec.c | 97 ++++++++++++++-----------
> 1 file changed, 54 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
> index 584a6b3f6542..05a0948ad929 100644
> --- a/drivers/net/ethernet/socionext/netsec.c
> +++ b/drivers/net/ethernet/socionext/netsec.c
> @@ -257,7 +257,6 @@ struct netsec_desc_ring {
> dma_addr_t desc_dma;
> struct netsec_desc *desc;
> void *vaddr;
> - u16 pkt_cnt;
> u16 head, tail;
> };
>
> @@ -598,33 +597,26 @@ static void netsec_set_rx_de(struct netsec_priv *priv,
> dring->desc[idx].len = desc->len;
> }
>
> -static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget)
> +static bool netsec_clean_tx_dring(struct netsec_priv *priv)
> {
> struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX];
> unsigned int pkts, bytes;
> -
> - dring->pkt_cnt += netsec_read(priv, NETSEC_REG_NRM_TX_DONE_PKTCNT);
> -
> - if (dring->pkt_cnt < budget)
> - budget = dring->pkt_cnt;
> + struct netsec_de *entry;
> + int tail = dring->tail;
> + int cnt = 0;
>
> pkts = 0;
> bytes = 0;
> + entry = dring->vaddr + DESC_SZ * tail;
>
> - while (pkts < budget) {
> + while (!(entry->attr & (1U << NETSEC_TX_SHIFT_OWN_FIELD)) &&
> + cnt < DESC_NUM) {
> struct netsec_desc *desc;
> - struct netsec_de *entry;
> - int tail, eop;
> -
> - tail = dring->tail;
> -
> - /* move tail ahead */
> - dring->tail = (tail + 1) % DESC_NUM;
> + int eop;
>
> desc = &dring->desc[tail];
> - entry = dring->vaddr + DESC_SZ * tail;
> -
> eop = (entry->attr >> NETSEC_TX_LAST) & 1;
> + dma_rmb();
>
> dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
> DMA_TO_DEVICE);
> @@ -633,38 +625,51 @@ static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget)
> bytes += desc->skb->len;
> dev_kfree_skb(desc->skb);
> }
> + /* clean up so netsec_uninit_pkt_dring() won't free the skb
> + * again
> + */
> *desc = (struct netsec_desc){};
> +
> + /* entry->attr is not going to be accessed by the NIC until
> + * netsec_set_tx_de() is called. No need for a dma_wmb() here
> + */
> + entry->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD;
> + /* move tail ahead */
> + dring->tail = (tail + 1) % DESC_NUM;
> +
> + tail = dring->tail;
> + entry = dring->vaddr + DESC_SZ * tail;
> + cnt++;
> }
> - dring->pkt_cnt -= budget;
>
> - priv->ndev->stats.tx_packets += budget;
> + if (!cnt)
> + return false;
> +
> + /* reading the register clears the irq */
> + netsec_read(priv, NETSEC_REG_NRM_TX_DONE_PKTCNT);
> +
> + priv->ndev->stats.tx_packets += cnt;
> priv->ndev->stats.tx_bytes += bytes;
>
> - netdev_completed_queue(priv->ndev, budget, bytes);
> + netdev_completed_queue(priv->ndev, cnt, bytes);
>
> - return budget;
> + return true;
> }
>
> -static int netsec_process_tx(struct netsec_priv *priv, int budget)
> +static void netsec_process_tx(struct netsec_priv *priv)
> {
> struct net_device *ndev = priv->ndev;
> - int new, done = 0;
> + bool cleaned;
>
> - do {
> - new = netsec_clean_tx_dring(priv, budget);
> - done += new;
> - budget -= new;
> - } while (new);
> + cleaned = netsec_clean_tx_dring(priv);
>
> - if (done && netif_queue_stopped(ndev)) {
> + if (cleaned && netif_queue_stopped(ndev)) {
> /* Make sure we update the value, anyone stopping the queue
> * after this will read the proper consumer idx
> */
> smp_wmb();
> netif_wake_queue(ndev);
> }
> -
> - return done;
> }
>
> static void *netsec_alloc_rx_data(struct netsec_priv *priv,
> @@ -813,24 +818,17 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
> static int netsec_napi_poll(struct napi_struct *napi, int budget)
> {
> struct netsec_priv *priv;
> - int tx, rx, done, todo;
> + int rx, done, todo;
>
> priv = container_of(napi, struct netsec_priv, napi);
>
> + netsec_process_tx(priv);
> +
So shouldn't we do this *after* processing Rx, and only if there is budget left?
> todo = budget;
> do {
> - if (!todo)
> - break;
> -
> - tx = netsec_process_tx(priv, todo);
> - todo -= tx;
> -
> - if (!todo)
> - break;
> -
> rx = netsec_process_rx(priv, todo);
> todo -= rx;
> - } while (rx || tx);
> + } while (rx);
>
> done = budget - todo;
>
> @@ -1007,7 +1005,6 @@ static void netsec_uninit_pkt_dring(struct netsec_priv *priv, int id)
>
> dring->head = 0;
> dring->tail = 0;
> - dring->pkt_cnt = 0;
>
> if (id == NETSEC_RING_TX)
> netdev_reset_queue(priv->ndev);
> @@ -1030,6 +1027,7 @@ static void netsec_free_dring(struct netsec_priv *priv, int id)
> static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id)
> {
> struct netsec_desc_ring *dring = &priv->desc_ring[id];
> + int i;
>
> dring->vaddr = dma_zalloc_coherent(priv->dev, DESC_SZ * DESC_NUM,
> &dring->desc_dma, GFP_KERNEL);
> @@ -1040,6 +1038,19 @@ static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id)
> if (!dring->desc)
> goto err;
>
> + if (id == NETSEC_RING_TX) {
> + for (i = 0; i < DESC_NUM; i++) {
> + struct netsec_de *de;
> +
> + de = dring->vaddr + (DESC_SZ * i);
> + /* de->attr is not going to be accessed by the NIC
> + * until netsec_set_tx_de() is called.
> + * No need for a dma_wmb() here
> + */
> + de->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD;
> + }
> + }
> +
> return 0;
> err:
> netsec_free_dring(priv, id);
> --
> 2.19.1
>
Powered by blists - more mailing lists