linux-kernel - Re: [RFC PATCH v3 07/12] page-pool: device memory support

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <429f6303c9c61d50ba6c5fdddec30c22dc0b2c09.camel@redhat.com>
Date:   Thu, 09 Nov 2023 10:01:45 +0100
From:   Paolo Abeni <pabeni@...hat.com>
To:     Mina Almasry <almasrymina@...gle.com>, netdev@...r.kernel.org,
        linux-kernel@...r.kernel.org, linux-arch@...r.kernel.org,
        linux-kselftest@...r.kernel.org, linux-media@...r.kernel.org,
        dri-devel@...ts.freedesktop.org, linaro-mm-sig@...ts.linaro.org
Cc:     "David S. Miller" <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        Jakub Kicinski <kuba@...nel.org>,
        Jesper Dangaard Brouer <hawk@...nel.org>,
        Ilias Apalodimas <ilias.apalodimas@...aro.org>,
        Arnd Bergmann <arnd@...db.de>,
        David Ahern <dsahern@...nel.org>,
        Willem de Bruijn <willemdebruijn.kernel@...il.com>,
        Shuah Khan <shuah@...nel.org>,
        Sumit Semwal <sumit.semwal@...aro.org>,
        Christian König <christian.koenig@....com>,
        Shakeel Butt <shakeelb@...gle.com>,
        Jeroen de Borst <jeroendb@...gle.com>,
        Praveen Kaligineedi <pkaligineedi@...gle.com>
Subject: Re: [RFC PATCH v3 07/12] page-pool: device memory support

On Sun, 2023-11-05 at 18:44 -0800, Mina Almasry wrote:
> Overload the LSB of struct page* to indicate that it's a page_pool_iov.
> 
> Refactor mm calls on struct page* into helpers, and add page_pool_iov
> handling on those helpers. Modify callers of these mm APIs with calls to
> these helpers instead.
> 
> In areas where struct page* is dereferenced, add a check for special
> handling of page_pool_iov.
> 
> Signed-off-by: Mina Almasry <almasrymina@...gle.com>
> 
> ---
>  include/net/page_pool/helpers.h | 74 ++++++++++++++++++++++++++++++++-
>  net/core/page_pool.c            | 63 ++++++++++++++++++++--------
>  2 files changed, 118 insertions(+), 19 deletions(-)
> 
> diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
> index b93243c2a640..08f1a2cc70d2 100644
> --- a/include/net/page_pool/helpers.h
> +++ b/include/net/page_pool/helpers.h
> @@ -151,6 +151,64 @@ static inline struct page_pool_iov *page_to_page_pool_iov(struct page *page)
>  	return NULL;
>  }
>  
> +static inline int page_pool_page_ref_count(struct page *page)
> +{
> +	if (page_is_page_pool_iov(page))
> +		return page_pool_iov_refcount(page_to_page_pool_iov(page));
> +
> +	return page_ref_count(page);
> +}
> +
> +static inline void page_pool_page_get_many(struct page *page,
> +					   unsigned int count)
> +{
> +	if (page_is_page_pool_iov(page))
> +		return page_pool_iov_get_many(page_to_page_pool_iov(page),
> +					      count);
> +
> +	return page_ref_add(page, count);
> +}
> +
> +static inline void page_pool_page_put_many(struct page *page,
> +					   unsigned int count)
> +{
> +	if (page_is_page_pool_iov(page))
> +		return page_pool_iov_put_many(page_to_page_pool_iov(page),
> +					      count);
> +
> +	if (count > 1)
> +		page_ref_sub(page, count - 1);
> +
> +	put_page(page);
> +}
> +
> +static inline bool page_pool_page_is_pfmemalloc(struct page *page)
> +{
> +	if (page_is_page_pool_iov(page))
> +		return false;
> +
> +	return page_is_pfmemalloc(page);
> +}
> +
> +static inline bool page_pool_page_is_pref_nid(struct page *page, int pref_nid)
> +{
> +	/* Assume page_pool_iov are on the preferred node without actually
> +	 * checking...
> +	 *
> +	 * This check is only used to check for recycling memory in the page
> +	 * pool's fast paths. Currently the only implementation of page_pool_iov
> +	 * is dmabuf device memory. It's a deliberate decision by the user to
> +	 * bind a certain dmabuf to a certain netdev, and the netdev rx queue
> +	 * would not be able to reallocate memory from another dmabuf that
> +	 * exists on the preferred node, so, this check doesn't make much sense
> +	 * in this case. Assume all page_pool_iovs can be recycled for now.
> +	 */
> +	if (page_is_page_pool_iov(page))
> +		return true;
> +
> +	return page_to_nid(page) == pref_nid;
> +}
> +
>  /**
>   * page_pool_dev_alloc_pages() - allocate a page.
>   * @pool:	pool from which to allocate
> @@ -301,6 +359,9 @@ static inline long page_pool_defrag_page(struct page *page, long nr)
>  {
>  	long ret;
>  
> +	if (page_is_page_pool_iov(page))
> +		return -EINVAL;
> +
>  	/* If nr == pp_frag_count then we have cleared all remaining
>  	 * references to the page:
>  	 * 1. 'n == 1': no need to actually overwrite it.
> @@ -431,7 +492,12 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va,
>   */
>  static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>  {
> -	dma_addr_t ret = page->dma_addr;
> +	dma_addr_t ret;
> +
> +	if (page_is_page_pool_iov(page))
> +		return page_pool_iov_dma_addr(page_to_page_pool_iov(page));

Should the above conditional be guarded by the page_pool_mem_providers
static key? this looks like fast-path. Same question for the refcount
helper above.

Minor nit: possibly cache 'page_is_page_pool_iov(page)' to make the
code more readable.

> +
> +	ret = page->dma_addr;
>  
>  	if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
>  		ret <<= PAGE_SHIFT;
> @@ -441,6 +507,12 @@ static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>  
>  static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
>  {
> +	/* page_pool_iovs are mapped and their dma-addr can't be modified. */
> +	if (page_is_page_pool_iov(page)) {
> +		DEBUG_NET_WARN_ON_ONCE(true);
> +		return false;
> +	}

Quickly skimming over the page_pool_code it looks like
page_pool_set_dma_addr() usage is guarded by the PP_FLAG_DMA_MAP page
pool flag. Could the device mem provider enforce such flag being
cleared on the page pool?

> +
>  	if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) {
>  		page->dma_addr = addr >> PAGE_SHIFT;
>  
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index 138ddea0b28f..d211996d423b 100644
> --- a/net/core/page_pool.cnn
> +++ b/net/core/page_pool.c
> @@ -317,7 +317,7 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
>  		if (unlikely(!page))
>  			break;
>  
> -		if (likely(page_to_nid(page) == pref_nid)) {
> +		if (likely(page_pool_page_is_pref_nid(page, pref_nid))) {
>  			pool->alloc.cache[pool->alloc.count++] = page;
>  		} else {
>  			/* NUMA mismatch;
> @@ -362,7 +362,15 @@ static void page_pool_dma_sync_for_device(struct page_pool *pool,
>  					  struct page *page,
>  					  unsigned int dma_sync_size)
>  {
> -	dma_addr_t dma_addr = page_pool_get_dma_addr(page);
> +	dma_addr_t dma_addr;
> +
> +	/* page_pool_iov memory provider do not support PP_FLAG_DMA_SYNC_DEV */
> +	if (page_is_page_pool_iov(page)) {
> +		DEBUG_NET_WARN_ON_ONCE(true);
> +		return;
> +	}

Similar to the above point, mutatis mutandis.

> +
> +	dma_addr = page_pool_get_dma_addr(page);
>  
>  	dma_sync_size = min(dma_sync_size, pool->p.max_len);
>  	dma_sync_single_range_for_device(pool->p.dev, dma_addr,
> @@ -374,6 +382,12 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
>  {
>  	dma_addr_t dma;
>  
> +	if (page_is_page_pool_iov(page)) {
> +		/* page_pool_iovs are already mapped */
> +		DEBUG_NET_WARN_ON_ONCE(true);
> +		return true;
> +	}

Ditto.

Cheers,

Paolo