lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <b25b599e-069e-bf1f-d598-edee732aa30e@pensando.io>
Date:   Fri, 12 Mar 2021 12:01:48 -0800
From:   Shannon Nelson <snelson@...sando.io>
To:     Alexander Duyck <alexander.duyck@...il.com>
Cc:     Netdev <netdev@...r.kernel.org>,
        David Miller <davem@...emloft.net>,
        Jakub Kicinski <kuba@...nel.org>,
        Pensando Drivers <drivers@...sando.io>
Subject: Re: [PATCH net-next 2/6] ionic: implement Rx page reuse

On 3/10/21 6:14 PM, Alexander Duyck wrote:
> On Wed, Mar 10, 2021 at 11:28 AM Shannon Nelson <snelson@...sando.io> wrote:
>> Rework the Rx buffer allocations to use pages twice when using
>> normal MTU in order to cut down on buffer allocation and mapping
>> overhead.
>>
>> Instead of tracking individual pages, in which we may have
>> wasted half the space when using standard 1500 MTU, we track
>> buffers which use half pages, so we can use the second half
>> of the page rather than allocate and map a new page once the
>> first buffer has been used.
>>
>> Signed-off-by: Shannon Nelson <snelson@...sando.io>
> So looking at the approach taken here it just seems like you are doing
> the linear walk approach and getting 2 uses per 4K page. If you are
> taking that route it might make more sense to just split the page and
> use both pieces immediately to populate 2 entries instead of waiting
> on the next loop through the ring. Then you could just split the page
> into multiple buffers and fill your sg list using less total pages
> rather than having 2K gaps between your entries. An added advantage
> would be that you could simply merge the page fragments in the event
> that you have something writing to the full 2K buffers and you cannot
> use copybreak.

Thanks, Alex, for the detailed review.  I'll work with our internal 
performance guy to fold these comments into an update.
sln


>
>> ---
>>   .../net/ethernet/pensando/ionic/ionic_dev.h   |  12 +-
>>   .../net/ethernet/pensando/ionic/ionic_txrx.c  | 215 +++++++++++-------
>>   2 files changed, 138 insertions(+), 89 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
>> index 690768ff0143..0f877c86eba6 100644
>> --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
>> +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
>> @@ -170,9 +170,15 @@ typedef void (*ionic_desc_cb)(struct ionic_queue *q,
>>                                struct ionic_desc_info *desc_info,
>>                                struct ionic_cq_info *cq_info, void *cb_arg);
>>
>> -struct ionic_page_info {
>> +#define IONIC_PAGE_SIZE                                PAGE_SIZE
>> +#define IONIC_PAGE_SPLIT_SZ                    (PAGE_SIZE / 2)
> This probably doesn't work out too well when the page size gets up to
> 64K. I don't know of too many networks that support a 32K MTU.. :)
>
>> +#define IONIC_PAGE_GFP_MASK                    (GFP_ATOMIC | __GFP_NOWARN |\
>> +                                                __GFP_COMP | __GFP_MEMALLOC)
>> +
>> +struct ionic_buf_info {
>>          struct page *page;
>>          dma_addr_t dma_addr;
>> +       u32 page_offset;
>>   };
> I'm not really sure the rename was needed. You are still just working
> with a page aren't you? It would actually reduce the complexity of
> this patch a bunch as you could drop the renaming changes.
>
>>   struct ionic_desc_info {
>> @@ -187,8 +193,8 @@ struct ionic_desc_info {
>>                  struct ionic_txq_sg_desc *txq_sg_desc;
>>                  struct ionic_rxq_sg_desc *rxq_sgl_desc;
>>          };
>> -       unsigned int npages;
>> -       struct ionic_page_info pages[IONIC_RX_MAX_SG_ELEMS + 1];
>> +       unsigned int nbufs;
>> +       struct ionic_buf_info bufs[IONIC_RX_MAX_SG_ELEMS + 1];
>>          ionic_desc_cb cb;
>>          void *cb_arg;
>>   };
>> diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
>> index 70b997f302ac..3e13cfee9ecd 100644
>> --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
>> +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
>> @@ -54,7 +54,7 @@ static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
>>          if (frags)
>>                  skb = napi_get_frags(&q_to_qcq(q)->napi);
>>          else
>> -               skb = netdev_alloc_skb_ip_align(netdev, len);
>> +               skb = napi_alloc_skb(&q_to_qcq(q)->napi, len);
>>
>>          if (unlikely(!skb)) {
>>                  net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
>> @@ -66,8 +66,15 @@ static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
>>          return skb;
>>   }
>>
>> +static void ionic_rx_buf_reset(struct ionic_buf_info *buf_info)
>> +{
>> +       buf_info->page = NULL;
>> +       buf_info->page_offset = 0;
>> +       buf_info->dma_addr = 0;
>> +}
>> +
> Technically speaking you probably only need to reset the page value.
> You could hold off on resetting the page_offset and dma_addr until you
> actually are populating the page.
>
>>   static int ionic_rx_page_alloc(struct ionic_queue *q,
>> -                              struct ionic_page_info *page_info)
>> +                              struct ionic_buf_info *buf_info)
>>   {
>>          struct ionic_lif *lif = q->lif;
>>          struct ionic_rx_stats *stats;
>> @@ -78,26 +85,26 @@ static int ionic_rx_page_alloc(struct ionic_queue *q,
>>          dev = lif->ionic->dev;
>>          stats = q_to_rx_stats(q);
>>
>> -       if (unlikely(!page_info)) {
>> -               net_err_ratelimited("%s: %s invalid page_info in alloc\n",
>> +       if (unlikely(!buf_info)) {
>> +               net_err_ratelimited("%s: %s invalid buf_info in alloc\n",
>>                                      netdev->name, q->name);
>>                  return -EINVAL;
>>          }
>>
>> -       page_info->page = dev_alloc_page();
>> -       if (unlikely(!page_info->page)) {
>> +       buf_info->page = alloc_pages(IONIC_PAGE_GFP_MASK, 0);
>> +       if (unlikely(!buf_info->page)) {
>>                  net_err_ratelimited("%s: %s page alloc failed\n",
>>                                      netdev->name, q->name);
>>                  stats->alloc_err++;
>>                  return -ENOMEM;
>>          }
>> +       buf_info->page_offset = 0;
>>
>> -       page_info->dma_addr = dma_map_page(dev, page_info->page, 0, PAGE_SIZE,
>> -                                          DMA_FROM_DEVICE);
>> -       if (unlikely(dma_mapping_error(dev, page_info->dma_addr))) {
>> -               put_page(page_info->page);
>> -               page_info->dma_addr = 0;
>> -               page_info->page = NULL;
>> +       buf_info->dma_addr = dma_map_page(dev, buf_info->page, buf_info->page_offset,
>> +                                         IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
>> +       if (unlikely(dma_mapping_error(dev, buf_info->dma_addr))) {
>> +               __free_pages(buf_info->page, 0);
>> +               ionic_rx_buf_reset(buf_info);
>>                  net_err_ratelimited("%s: %s dma map failed\n",
>>                                      netdev->name, q->name);
>>                  stats->dma_map_err++;
> So one thing I would change about the setup here is that I would not
> store anything in buf_info page until after you have stored the
> page_offset and dma_addr. That way you know that the other two are
> only valid because a page is present. In addition you can avoid having
> to do the extra cleanup as they should only be read if info->page is
> set.
>
>> @@ -108,32 +115,46 @@ static int ionic_rx_page_alloc(struct ionic_queue *q,
>>   }
>>
>>   static void ionic_rx_page_free(struct ionic_queue *q,
>> -                              struct ionic_page_info *page_info)
>> +                              struct ionic_buf_info *buf_info)
>>   {
>> -       struct ionic_lif *lif = q->lif;
>> -       struct net_device *netdev;
>> -       struct device *dev;
>> -
>> -       netdev = lif->netdev;
>> -       dev = lif->ionic->dev;
>> +       struct net_device *netdev = q->lif->netdev;
>> +       struct device *dev = q->lif->ionic->dev;
>>
>> -       if (unlikely(!page_info)) {
>> -               net_err_ratelimited("%s: %s invalid page_info in free\n",
>> +       if (unlikely(!buf_info)) {
>> +               net_err_ratelimited("%s: %s invalid buf_info in free\n",
>>                                      netdev->name, q->name);
>>                  return;
>>          }
>>
>> -       if (unlikely(!page_info->page)) {
>> -               net_err_ratelimited("%s: %s invalid page in free\n",
>> -                                   netdev->name, q->name);
>> +       if (!buf_info->page)
>>                  return;
>> -       }
>>
>> -       dma_unmap_page(dev, page_info->dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
>> +       dma_unmap_page(dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
>> +       __free_pages(buf_info->page, 0);
>> +       ionic_rx_buf_reset(buf_info);
>> +}
>> +
>> +static bool ionic_rx_buf_recycle(struct ionic_queue *q,
>> +                                struct ionic_buf_info *buf_info, u32 used)
>> +{
>> +       u32 size;
>> +
>> +       /* don't re-use pages allocated in low-mem condition */
>> +       if (page_is_pfmemalloc(buf_info->page))
>> +               return false;
>> +
>> +       /* don't re-use buffers from non-local numa nodes */
>> +       if (page_to_nid(buf_info->page) != numa_mem_id())
>> +               return false;
>> +
> So I am not sure if either of these is really needed if you are just
> going to be freeing the page after all 4K is consumed. With the Intel
> drivers we were bouncing back and forth between the upper and lower
> halves. With this it looks like you do a linear walk and then exit
> when you have reached the end of the page. With that being the case
> the memory locality check is kind of moot since you will flush the
> page after two uses anyway. In addition the pfmemalloc check might
> also be moot since it may actually be more efficient to reuse the page
> rather than use a full 4K and attempt to allocate yet another page.
>
>> +       size = ALIGN(used, IONIC_PAGE_SPLIT_SZ);
>> +       buf_info->page_offset += size;
>> +       if (buf_info->page_offset >= IONIC_PAGE_SIZE)
>> +               return false;
>> +
>> +       get_page(buf_info->page);
> The get_page per 2K section will add up in terms of cost as it is an
> expensive atomic operation. You might see if you can get away with
> batching it to do one atomic add per allocation instead of one per
> use.
>
>> -       put_page(page_info->page);
>> -       page_info->dma_addr = 0;
>> -       page_info->page = NULL;
>> +       return true;
>>   }
>>
>>   static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
>> @@ -142,16 +163,16 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
>>   {
>>          struct ionic_rxq_comp *comp = cq_info->cq_desc;
>>          struct device *dev = q->lif->ionic->dev;
>> -       struct ionic_page_info *page_info;
>> +       struct ionic_buf_info *buf_info;
>>          struct sk_buff *skb;
>>          unsigned int i;
>>          u16 frag_len;
>>          u16 len;
>>
>> -       page_info = &desc_info->pages[0];
>> +       buf_info = &desc_info->bufs[0];
>>          len = le16_to_cpu(comp->len);
>>
>> -       prefetch(page_address(page_info->page) + NET_IP_ALIGN);
>> +       prefetch(buf_info->page);
> Just want to confirm this is what you meant to do. The old code was
> prefetching the first line of the data. The new code is just
> prefetching the page struct. You may want to change this to a
> prefetchw if you are expecting to use this to improve the performance
> for the get_page call.
>
>>          skb = ionic_rx_skb_alloc(q, len, true);
>>          if (unlikely(!skb))
>> @@ -159,7 +180,7 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
>>
>>          i = comp->num_sg_elems + 1;
>>          do {
>> -               if (unlikely(!page_info->page)) {
>> +               if (unlikely(!buf_info->page)) {
>>                          struct napi_struct *napi = &q_to_qcq(q)->napi;
>>
>>                          napi->skb = NULL;
>> @@ -167,15 +188,25 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
>>                          return NULL;
>>                  }
>>
>> -               frag_len = min(len, (u16)PAGE_SIZE);
>> +               frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
>>                  len -= frag_len;
>>
>> -               dma_unmap_page(dev, dma_unmap_addr(page_info, dma_addr),
>> -                              PAGE_SIZE, DMA_FROM_DEVICE);
>> +               dma_sync_single_for_cpu(dev,
>> +                                       buf_info->dma_addr + buf_info->page_offset,
>> +                                       frag_len, DMA_FROM_DEVICE);
>> +
>>                  skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
>> -                               page_info->page, 0, frag_len, PAGE_SIZE);
>> -               page_info->page = NULL;
>> -               page_info++;
>> +                               buf_info->page, buf_info->page_offset, frag_len,
>> +                               IONIC_PAGE_SIZE);
>> +
>> +               if (!ionic_rx_buf_recycle(q, buf_info, frag_len)) {
>> +                       dma_unmap_page(dev, buf_info->dma_addr,
>> +                                      IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
>> +                       ionic_rx_buf_reset(buf_info);
>> +               }
>> +
> If you don't unmap it don't you need to sync the remaining portion of
> the page for use by the device?
>
>> +               buf_info++;
>> +
>>                  i--;
>>          } while (i > 0);
>>
>> @@ -188,26 +219,26 @@ static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
>>   {
>>          struct ionic_rxq_comp *comp = cq_info->cq_desc;
>>          struct device *dev = q->lif->ionic->dev;
>> -       struct ionic_page_info *page_info;
>> +       struct ionic_buf_info *buf_info;
>>          struct sk_buff *skb;
>>          u16 len;
>>
>> -       page_info = &desc_info->pages[0];
>> +       buf_info = &desc_info->bufs[0];
>>          len = le16_to_cpu(comp->len);
>>
>>          skb = ionic_rx_skb_alloc(q, len, false);
>>          if (unlikely(!skb))
>>                  return NULL;
>>
>> -       if (unlikely(!page_info->page)) {
>> +       if (unlikely(!buf_info->page)) {
>>                  dev_kfree_skb(skb);
>>                  return NULL;
>>          }
>>
>> -       dma_sync_single_for_cpu(dev, dma_unmap_addr(page_info, dma_addr),
>> +       dma_sync_single_for_cpu(dev, buf_info->dma_addr + buf_info->page_offset,
>>                                  len, DMA_FROM_DEVICE);
>> -       skb_copy_to_linear_data(skb, page_address(page_info->page), len);
>> -       dma_sync_single_for_device(dev, dma_unmap_addr(page_info, dma_addr),
>> +       skb_copy_to_linear_data(skb, page_address(buf_info->page) + buf_info->page_offset, len);
>> +       dma_sync_single_for_device(dev, buf_info->dma_addr + buf_info->page_offset,
>>                                     len, DMA_FROM_DEVICE);
>>
>>          skb_put(skb, len);
>> @@ -327,64 +358,73 @@ void ionic_rx_fill(struct ionic_queue *q)
>>   {
>>          struct net_device *netdev = q->lif->netdev;
>>          struct ionic_desc_info *desc_info;
>> -       struct ionic_page_info *page_info;
>>          struct ionic_rxq_sg_desc *sg_desc;
>>          struct ionic_rxq_sg_elem *sg_elem;
>> +       struct ionic_buf_info *buf_info;
>>          struct ionic_rxq_desc *desc;
>> +       unsigned int max_sg_elems;
>>          unsigned int remain_len;
>> -       unsigned int seg_len;
>> +       unsigned int frag_len;
>>          unsigned int nfrags;
>>          unsigned int i, j;
>>          unsigned int len;
>>
>>          len = netdev->mtu + ETH_HLEN + VLAN_HLEN;
>> -       nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE;
>> +       max_sg_elems = q->lif->qtype_info[IONIC_QTYPE_RXQ].max_sg_elems;
>>
>>          for (i = ionic_q_space_avail(q); i; i--) {
>> +               nfrags = 0;
>>                  remain_len = len;
>>                  desc_info = &q->info[q->head_idx];
>>                  desc = desc_info->desc;
>> -               sg_desc = desc_info->sg_desc;
>> -               page_info = &desc_info->pages[0];
>> +               buf_info = &desc_info->bufs[0];
>>
>> -               if (page_info->page) { /* recycle the buffer */
>> -                       ionic_rxq_post(q, false, ionic_rx_clean, NULL);
>> -                       continue;
>> -               }
>> -
>> -               /* fill main descriptor - pages[0] */
>> -               desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
>> -                                             IONIC_RXQ_DESC_OPCODE_SIMPLE;
>> -               desc_info->npages = nfrags;
>> -               if (unlikely(ionic_rx_page_alloc(q, page_info))) {
>> -                       desc->addr = 0;
>> -                       desc->len = 0;
>> -                       return;
>> +               if (!buf_info->page) { /* alloc a new buffer? */
>> +                       if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
>> +                               desc->addr = 0;
>> +                               desc->len = 0;
>> +                               return;
>> +                       }
>>                  }
>> -               desc->addr = cpu_to_le64(page_info->dma_addr);
>> -               seg_len = min_t(unsigned int, PAGE_SIZE, len);
>> -               desc->len = cpu_to_le16(seg_len);
>> -               remain_len -= seg_len;
>> -               page_info++;
>>
>> -               /* fill sg descriptors - pages[1..n] */
>> -               for (j = 0; j < nfrags - 1; j++) {
>> -                       if (page_info->page) /* recycle the sg buffer */
>> -                               continue;
>> +               /* fill main descriptor - buf[0] */
>> +               desc->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
>> +               frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
>> +               desc->len = cpu_to_le16(frag_len);
>> +               remain_len -= frag_len;
>> +               buf_info++;
>> +               nfrags++;
>>
>> +               /* fill sg descriptors - buf[1..n] */
>> +               sg_desc = desc_info->sg_desc;
>> +               for (j = 0; remain_len > 0 && j < max_sg_elems; j++) {
>>                          sg_elem = &sg_desc->elems[j];
>> -                       if (unlikely(ionic_rx_page_alloc(q, page_info))) {
>> -                               sg_elem->addr = 0;
>> -                               sg_elem->len = 0;
>> -                               return;
>> +                       if (!buf_info->page) { /* alloc a new sg buffer? */
>> +                               if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
>> +                                       sg_elem->addr = 0;
>> +                                       sg_elem->len = 0;
>> +                                       return;
>> +                               }
>>                          }
>> -                       sg_elem->addr = cpu_to_le64(page_info->dma_addr);
>> -                       seg_len = min_t(unsigned int, PAGE_SIZE, remain_len);
>> -                       sg_elem->len = cpu_to_le16(seg_len);
>> -                       remain_len -= seg_len;
>> -                       page_info++;
>> +
>> +                       sg_elem->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
>> +                       frag_len = min_t(u16, remain_len, IONIC_PAGE_SIZE - buf_info->page_offset);
>> +                       sg_elem->len = cpu_to_le16(frag_len);
>> +                       remain_len -= frag_len;
>> +                       buf_info++;
>> +                       nfrags++;
>>                  }
>>
>> +               /* clear end sg element as a sentinel */
>> +               if (j < max_sg_elems) {
>> +                       sg_elem = &sg_desc->elems[j];
>> +                       memset(sg_elem, 0, sizeof(*sg_elem));
>> +               }
>> +
>> +               desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
>> +                                             IONIC_RXQ_DESC_OPCODE_SIMPLE;
>> +               desc_info->nbufs = nfrags;
>> +
>>                  ionic_rxq_post(q, false, ionic_rx_clean, NULL);
>>          }
>>
>> @@ -395,21 +435,24 @@ void ionic_rx_fill(struct ionic_queue *q)
>>   void ionic_rx_empty(struct ionic_queue *q)
>>   {
>>          struct ionic_desc_info *desc_info;
>> -       struct ionic_page_info *page_info;
>> +       struct ionic_buf_info *buf_info;
>>          unsigned int i, j;
>>
>>          for (i = 0; i < q->num_descs; i++) {
>>                  desc_info = &q->info[i];
>>                  for (j = 0; j < IONIC_RX_MAX_SG_ELEMS + 1; j++) {
>> -                       page_info = &desc_info->pages[j];
>> -                       if (page_info->page)
>> -                               ionic_rx_page_free(q, page_info);
>> +                       buf_info = &desc_info->bufs[j];
>> +                       if (buf_info->page)
>> +                               ionic_rx_page_free(q, buf_info);
>>                  }
>>
>> -               desc_info->npages = 0;
>> +               desc_info->nbufs = 0;
>>                  desc_info->cb = NULL;
>>                  desc_info->cb_arg = NULL;
>>          }
>> +
>> +       q->head_idx = 0;
>> +       q->tail_idx = 0;
>>   }
>>
>>   static void ionic_dim_update(struct ionic_qcq *qcq)
>> --
>> 2.17.1
>>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ