lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAHS8izMjmj0DRT_vjzVq5HMQyXtZdVK=o4OP0gzbaN=aJdQ3ig@mail.gmail.com>
Date: Mon, 13 Nov 2023 05:42:16 -0800
From: Mina Almasry <almasrymina@...gle.com>
To: Yunsheng Lin <linyunsheng@...wei.com>
Cc: davem@...emloft.net, kuba@...nel.org, pabeni@...hat.com, 
	netdev@...r.kernel.org, linux-kernel@...r.kernel.org, 
	Willem de Bruijn <willemb@...gle.com>, Kaiyuan Zhang <kaiyuanz@...gle.com>, 
	Jesper Dangaard Brouer <hawk@...nel.org>, Ilias Apalodimas <ilias.apalodimas@...aro.org>, 
	Eric Dumazet <edumazet@...gle.com>
Subject: Re: [PATCH RFC 3/8] memory-provider: dmabuf devmem memory provider

On Mon, Nov 13, 2023 at 5:00 AM Yunsheng Lin <linyunsheng@...wei.com> wrote:
>
> From: Mina Almasry <almasrymina@...gle.com>
>
> Implement a memory provider that allocates dmabuf devmem page_pool_iovs.
>
> Support of PP_FLAG_DMA_MAP and PP_FLAG_DMA_SYNC_DEV is omitted for
> simplicity.
>
> The provider receives a reference to the struct netdev_dmabuf_binding
> via the pool->mp_priv pointer. The driver needs to set this pointer for
> the provider in the page_pool_params.
>
> The provider obtains a reference on the netdev_dmabuf_binding which
> guarantees the binding and the underlying mapping remains alive until
> the provider is destroyed.
>
> Signed-off-by: Willem de Bruijn <willemb@...gle.com>
> Signed-off-by: Kaiyuan Zhang <kaiyuanz@...gle.com>
> Signed-off-by: Mina Almasry <almasrymina@...gle.com>
> Signed-off-by: Yunsheng Lin <linyunsheng@...wei.com>
> ---
>  include/net/page_pool/types.h | 28 +++++++++++
>  net/core/page_pool.c          | 93 +++++++++++++++++++++++++++++++++++
>  2 files changed, 121 insertions(+)
>
> diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
> index 5e4fcd45ba50..52e4cf98ebc6 100644
> --- a/include/net/page_pool/types.h
> +++ b/include/net/page_pool/types.h
> @@ -124,6 +124,7 @@ struct mem_provider;
>
>  enum pp_memory_provider_type {
>         __PP_MP_NONE, /* Use system allocator directly */
> +       PP_MP_DMABUF_DEVMEM, /* dmabuf devmem provider */
>  };
>
>  struct pp_memory_provider_ops {
> @@ -134,6 +135,33 @@ struct pp_memory_provider_ops {
>         void (*free_pages)(struct page_pool *pool, struct page *page);
>  };
>
> +extern const struct pp_memory_provider_ops dmabuf_devmem_ops;
> +
> +struct page_pool_iov {
> +       unsigned long res0;
> +       unsigned long pp_magic;
> +       struct page_pool *pp;
> +       struct page *page;  /* dmabuf memory provider specific field */
> +       unsigned long dma_addr;
> +       atomic_long_t pp_frag_count;
> +       unsigned int res1;
> +       refcount_t _refcount;
> +};
> +
> +#define PAGE_POOL_MATCH(pg, iov)                               \
> +       static_assert(offsetof(struct page, pg) ==              \
> +                     offsetof(struct page_pool_iov, iov))
> +PAGE_POOL_MATCH(flags, res0);
> +PAGE_POOL_MATCH(pp_magic, pp_magic);
> +PAGE_POOL_MATCH(pp, pp);
> +PAGE_POOL_MATCH(_pp_mapping_pad, page);
> +PAGE_POOL_MATCH(dma_addr, dma_addr);
> +PAGE_POOL_MATCH(pp_frag_count, pp_frag_count);
> +PAGE_POOL_MATCH(_mapcount, res1);
> +PAGE_POOL_MATCH(_refcount, _refcount);
> +#undef PAGE_POOL_MATCH
> +static_assert(sizeof(struct page_pool_iov) <= sizeof(struct page));
> +

You're doing exactly what I think you're doing, and what was nacked in RFC v1.

You've converted 'struct page_pool_iov' to essentially become a
duplicate of 'struct page'. Then, you're casting page_pool_iov* into
struct page* in mp_dmabuf_devmem_alloc_pages(), then, you're calling
mm APIs like page_ref_*() on the page_pool_iov* because you've fooled
the mm stack into thinking dma-buf memory is a struct page.

RFC v1 was almost exactly the same, except instead of creating a
duplicate definition of struct page, it just allocated 'struct page'
instead of allocating another struct that is identical to struct page
and casting it into struct page.

I don't think what you're doing here reverses the nacks I got in RFC
v1. You also did not CC any dma-buf or mm people on this proposal that
would bring up these concerns again.

>  struct page_pool {
>         struct page_pool_params p;
>
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index 6c502bea842b..1bd7a2306f09 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -231,6 +231,9 @@ static int page_pool_init(struct page_pool *pool,
>         switch (pool->p.memory_provider) {
>         case __PP_MP_NONE:
>                 break;
> +       case PP_MP_DMABUF_DEVMEM:
> +               pool->mp_ops = &dmabuf_devmem_ops;
> +               break;
>         default:
>                 err = -EINVAL;
>                 goto free_ptr_ring;
> @@ -1010,3 +1013,93 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
>         }
>  }
>  EXPORT_SYMBOL(page_pool_update_nid);
> +
> +/*** "Dmabuf devmem memory provider" ***/
> +
> +static int mp_dmabuf_devmem_init(struct page_pool *pool)
> +{
> +       if (pool->p.flags & PP_FLAG_DMA_MAP ||
> +           pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
> +               return -EOPNOTSUPP;
> +       return 0;
> +}
> +
> +static struct page *mp_dmabuf_devmem_alloc_pages(struct page_pool *pool,
> +                                                gfp_t gfp)
> +{
> +       struct page_pool_iov *ppiov;
> +       struct page *page;
> +       dma_addr_t dma;
> +
> +       ppiov = kvmalloc(sizeof(*ppiov), gfp | __GFP_ZERO);
> +       if (!ppiov)
> +               return NULL;
> +
> +       page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
> +       if (!page) {
> +               kvfree(ppiov);
> +               return NULL;
> +       }
> +
> +       dma = dma_map_page_attrs(pool->p.dev, page, 0,
> +                                (PAGE_SIZE << pool->p.order),
> +                                pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC |
> +                                                 DMA_ATTR_WEAK_ORDERING);
> +       if (dma_mapping_error(pool->p.dev, dma)) {
> +               put_page(page);
> +               kvfree(ppiov);
> +               return NULL;
> +       }
> +
> +       ppiov->pp = pool;
> +       ppiov->pp_magic = PP_SIGNATURE;
> +       ppiov->page = page;
> +       refcount_set(&ppiov->_refcount, 1);
> +       page_pool_fragment_page((struct page *)ppiov, 1);
> +       page_pool_set_dma_addr((struct page *)ppiov, dma);
> +       pool->pages_state_hold_cnt++;
> +       trace_page_pool_state_hold(pool, (struct page *)ppiov,
> +                                  pool->pages_state_hold_cnt);
> +       return (struct page *)ppiov;
> +}
> +
> +static void mp_dmabuf_devmem_destroy(struct page_pool *pool)
> +{
> +}
> +
> +static void mp_dmabuf_devmem_release_page(struct page_pool *pool,
> +                                         struct page *page)
> +{
> +       struct page_pool_iov *ppiov = (struct page_pool_iov *)page;
> +       dma_addr_t dma;
> +
> +       dma = page_pool_get_dma_addr(page);
> +
> +       /* When page is unmapped, it cannot be returned to our pool */
> +       dma_unmap_page_attrs(pool->p.dev, dma,
> +                            PAGE_SIZE << pool->p.order, pool->p.dma_dir,
> +                            DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
> +       page_pool_set_dma_addr(page, 0);
> +
> +       put_page(ppiov->page);
> +}
> +
> +static void mp_dmabuf_devmem_free_pages(struct page_pool *pool,
> +                                       struct page *page)
> +{
> +       int count;
> +
> +       count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
> +       trace_page_pool_state_release(pool, page, count);
> +
> +       kvfree(page);
> +}
> +
> +const struct pp_memory_provider_ops dmabuf_devmem_ops = {
> +       .init                   = mp_dmabuf_devmem_init,
> +       .destroy                = mp_dmabuf_devmem_destroy,
> +       .alloc_pages            = mp_dmabuf_devmem_alloc_pages,
> +       .release_page           = mp_dmabuf_devmem_release_page,
> +       .free_pages             = mp_dmabuf_devmem_free_pages,
> +};
> +EXPORT_SYMBOL(dmabuf_devmem_ops);
> --
> 2.33.0
>


-- 
Thanks,
Mina

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ