netdev - Re: [PATCH net-next 1/2] page_pool: check for PP direct cache locality later

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5699d031-d6d2-9cae-7025-2589eb60c0a0@huawei.com>
Date: Sat, 30 Mar 2024 20:41:24 +0800
From: Yunsheng Lin <linyunsheng@...wei.com>
To: Alexander Lobakin <aleksander.lobakin@...el.com>, "David S. Miller"
	<davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski
	<kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>
CC: Lorenzo Bianconi <lorenzo@...nel.org>,
	Toke Høiland-Jørgensen <toke@...hat.com>,
	<nex.sw.ncis.osdt.itp.upstreaming@...el.com>, <netdev@...r.kernel.org>,
	<linux-kernel@...r.kernel.org>
Subject: Re: [PATCH net-next 1/2] page_pool: check for PP direct cache
 locality later

On 2024/3/30 0:55, Alexander Lobakin wrote:
> Since we have pool->p.napi (Jakub) and pool->cpuid (Lorenzo) to check
> whether it's safe to use direct recycling, we can use both globally for
> each page instead of relying solely on @allow_direct argument.
> Let's assume that @allow_direct means "I'm sure it's local, don't waste
> time rechecking this" and when it's false, try the mentioned params to
> still recycle the page directly. If neither is true, we'll lose some
> CPU cycles, but then it surely won't be hotpath. On the other hand,
> paths where it's possible to use direct cache, but not possible to
> safely set @allow_direct, will benefit from this move.
> The whole propagation of @napi_safe through a dozen of skb freeing
> functions can now go away, which saves us some stack space.
> 
> Signed-off-by: Alexander Lobakin <aleksander.lobakin@...el.com>
> ---
>  include/linux/skbuff.h | 12 ++++----
>  net/core/page_pool.c   | 31 +++++++++++++++++--
>  net/core/skbuff.c      | 70 +++++++++++++-----------------------------
>  net/ipv4/esp4.c        |  2 +-
>  net/ipv6/esp6.c        |  2 +-
>  5 files changed, 58 insertions(+), 59 deletions(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index dadd3f55d549..f7f6e42c6814 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -3515,25 +3515,25 @@ int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
>  		    unsigned int headroom);
>  int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
>  			 struct bpf_prog *prog);
> -bool napi_pp_put_page(struct page *page, bool napi_safe);
> +bool napi_pp_put_page(struct page *page);
>  
>  static inline void
> -skb_page_unref(const struct sk_buff *skb, struct page *page, bool napi_safe)
> +skb_page_unref(const struct sk_buff *skb, struct page *page)
>  {
>  #ifdef CONFIG_PAGE_POOL
> -	if (skb->pp_recycle && napi_pp_put_page(page, napi_safe))
> +	if (skb->pp_recycle && napi_pp_put_page(page))
>  		return;
>  #endif
>  	put_page(page);
>  }
>  
>  static inline void
> -napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
> +napi_frag_unref(skb_frag_t *frag, bool recycle)
>  {
>  	struct page *page = skb_frag_page(frag);
>  
>  #ifdef CONFIG_PAGE_POOL
> -	if (recycle && napi_pp_put_page(page, napi_safe))
> +	if (recycle && napi_pp_put_page(page))
>  		return;
>  #endif
>  	put_page(page);
> @@ -3549,7 +3549,7 @@ napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
>   */
>  static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
>  {
> -	napi_frag_unref(frag, recycle, false);
> +	napi_frag_unref(frag, recycle);
>  }
>  
>  /**
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index dd364d738c00..9d56257e444b 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -690,8 +690,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
>  			page_pool_dma_sync_for_device(pool, page,
>  						      dma_sync_size);
>  
> -		if (allow_direct && in_softirq() &&
> -		    page_pool_recycle_in_cache(page, pool))
> +		if (allow_direct && page_pool_recycle_in_cache(page, pool))
>  			return NULL;
>  
>  		/* Page found as candidate for recycling */
> @@ -716,9 +715,35 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
>  	return NULL;
>  }
>  
> +static bool page_pool_napi_local(const struct page_pool *pool)
> +{
> +	const struct napi_struct *napi;
> +	u32 cpuid;
> +
> +	if (unlikely(!in_softirq()))
> +		return false;
> +
> +	/* Allow direct recycle if we have reasons to believe that we are
> +	 * in the same context as the consumer would run, so there's
> +	 * no possible race.
> +	 * __page_pool_put_page() makes sure we're not in hardirq context
> +	 * and interrupts are enabled prior to accessing the cache.
> +	 */
> +	cpuid = smp_processor_id();
> +	if (READ_ONCE(pool->cpuid) == cpuid)
> +		return true;
> +
> +	napi = READ_ONCE(pool->p.napi);
> +
> +	return napi && READ_ONCE(napi->list_owner) == cpuid;
> +}
> +
>  void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
>  				unsigned int dma_sync_size, bool allow_direct)
>  {
> +	if (!allow_direct)

It seems we are changing some semantics here, in_softirq() is checked
even if allow_direct is true before this patch. And it seems in_softirq()
is not checked if allow_direct is true after this patch? I think we might
need some assertion to ensure @allow_direct really means "I'm sure it's
local, don't waste time rechecking this". As my understanding, it is really
hard to debug this kind of problem, so in_softirq() is always checking.

Perhaps add something like WARN_ONCE() or DEBUG_NET_WARN_ON_ONCE for
allow_direct being true case to catch the API misuse?

> +		allow_direct = page_pool_napi_local(pool);
> +
>  	page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
>  	if (page && !page_pool_recycle_in_ring(pool, page)) {
>  		/* Cache full, fallback to free pages */
> @@ -969,7 +994,7 @@ void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
>  static void page_pool_disable_direct_recycling(struct page_pool *pool)
>  {
>  	/* Disable direct recycling based on pool->cpuid.
> -	 * Paired with READ_ONCE() in napi_pp_put_page().
> +	 * Paired with READ_ONCE() in page_pool_napi_local().
>  	 */
>  	WRITE_ONCE(pool->cpuid, -1);
>