lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Mon, 01 Jul 2024 16:10:19 -0700
From: Alexander H Duyck <alexander.duyck@...il.com>
To: Yunsheng Lin <linyunsheng@...wei.com>, davem@...emloft.net,
 kuba@...nel.org,  pabeni@...hat.com
Cc: netdev@...r.kernel.org, linux-kernel@...r.kernel.org, David Howells
	 <dhowells@...hat.com>, Andrew Morton <akpm@...ux-foundation.org>, 
	linux-mm@...ck.org
Subject: Re: [PATCH net-next v9 02/13] mm: move the page fragment allocator
 from page_alloc into its own file

On Tue, 2024-06-25 at 21:52 +0800, Yunsheng Lin wrote:
> Inspired by [1], move the page fragment allocator from page_alloc
> into its own c file and header file, as we are about to make more
> change for it to replace another page_frag implementation in
> sock.c
> 
> 1. https://lore.kernel.org/all/20230411160902.4134381-3-dhowells@redhat.com/
> 
> CC: David Howells <dhowells@...hat.com>
> CC: Alexander Duyck <alexander.duyck@...il.com>
> Signed-off-by: Yunsheng Lin <linyunsheng@...wei.com>

So one thing that I think might have been overlooked in the previous
reviews is the fact that the headers weren't necessarily self
sufficient. You were introducing dependencies that had to be fulfilled
by other headers.

One thing you might try doing as part of your testing would be to add a
C file that just adds your header and calls your functions to verify
that there aren't any unincluded dependencies.

> ---
>  include/linux/gfp.h             |  22 -----
>  include/linux/mm_types.h        |  18 ----
>  include/linux/page_frag_cache.h |  47 +++++++++++
>  include/linux/skbuff.h          |   1 +
>  mm/Makefile                     |   1 +
>  mm/page_alloc.c                 | 136 ------------------------------
>  mm/page_frag_cache.c            | 144 ++++++++++++++++++++++++++++++++
>  mm/page_frag_test.c             |   1 +
>  8 files changed, 194 insertions(+), 176 deletions(-)
>  create mode 100644 include/linux/page_frag_cache.h
>  create mode 100644 mm/page_frag_cache.c
> 
...

> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
> new file mode 100644
> index 000000000000..3a44bfc99750
> --- /dev/null
> +++ b/include/linux/page_frag_cache.h
> @@ -0,0 +1,47 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef _LINUX_PAGE_FRAG_CACHE_H
> +#define _LINUX_PAGE_FRAG_CACHE_H
> +
> +#include <linux/gfp_types.h>
> +

The gfp_types.h only really gives you the values you pass to the
gfp_mask. Did you mean to include linux/types.h to get the gfp_t
typedef?

> +#define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)

You should probably include linux/align.h to pull in the __ALIGN_MASK.

> +#define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)

I am pretty sure get_order is from asm/page.h as well.

> +
> +struct page_frag_cache {
> +	void *va;
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)

I am pretty sure PAGE_SIZE is included from asm/page.h

> +	__u16 offset;
> +	__u16 size;
> +#else
> +	__u32 offset;
> +#endif
> +	/* we maintain a pagecount bias, so that we dont dirty cache line
> +	 * containing page->_refcount every time we allocate a fragment.
> +	 */
> +	unsigned int		pagecnt_bias;
> +	bool pfmemalloc;
> +};
> +
> +void page_frag_cache_drain(struct page_frag_cache *nc);
> +void __page_frag_cache_drain(struct page *page, unsigned int count);
> +void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
> +			      gfp_t gfp_mask, unsigned int align_mask);
> +
> +static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
> +					  unsigned int fragsz, gfp_t gfp_mask,
> +					  unsigned int align)
> +{
> +	WARN_ON_ONCE(!is_power_of_2(align));

To get is_power_of_2 you should be including linux/log2.h.

> +	return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
> +}
> +
> +static inline void *page_frag_alloc(struct page_frag_cache *nc,
> +				    unsigned int fragsz, gfp_t gfp_mask)
> +{
> +	return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
> +}
> +
> +void page_frag_free(void *addr);
> +
> +#endif
> 

...

> diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
> new file mode 100644
> index 000000000000..88f567ef0e29
> --- /dev/null
> +++ b/mm/page_frag_cache.c
> @@ -0,0 +1,144 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Page fragment allocator
> + *
> + * Page Fragment:
> + *  An arbitrary-length arbitrary-offset area of memory which resides within a
> + *  0 or higher order page.  Multiple fragments within that page are
> + *  individually refcounted, in the page's reference counter.
> + *
> + * The page_frag functions provide a simple allocation framework for page
> + * fragments.  This is used by the network stack and network device drivers to
> + * provide a backing region of memory for use as either an sk_buff->head, or to
> + * be used in the "frags" portion of skb_shared_info.
> + */
> +
> +#include <linux/export.h>
> +#include <linux/init.h>
> +#include <linux/mm.h>
> +#include <linux/page_frag_cache.h>
> +#include "internal.h"

You could probably include gfp_types.h here since this is where you are
using the GFP_XXX values.

> +
> +static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
> +					     gfp_t gfp_mask)
> +{
> +	struct page *page = NULL;
> +	gfp_t gfp = gfp_mask;
> +
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +	gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) |  __GFP_COMP |
> +		   __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
> +	page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
> +				PAGE_FRAG_CACHE_MAX_ORDER);
> +	nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
> +#endif
> +	if (unlikely(!page))
> +		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
> +
> +	nc->va = page ? page_address(page) : NULL;
> +
> +	return page;
> +}
> +
> +void page_frag_cache_drain(struct page_frag_cache *nc)
> +{
> +	if (!nc->va)
> +		return;
> +
> +	__page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
> +	nc->va = NULL;
> +}
> +EXPORT_SYMBOL(page_frag_cache_drain);
> +
> +void __page_frag_cache_drain(struct page *page, unsigned int count)
> +{
> +	VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
> +
> +	if (page_ref_sub_and_test(page, count))
> +		free_unref_page(page, compound_order(page));
> +}
> +EXPORT_SYMBOL(__page_frag_cache_drain);
> +
> +void *__page_frag_alloc_align(struct page_frag_cache *nc,
> +			      unsigned int fragsz, gfp_t gfp_mask,
> +			      unsigned int align_mask)
> +{
> +	unsigned int size = PAGE_SIZE;
> +	struct page *page;
> +	int offset;
> +
> +	if (unlikely(!nc->va)) {
> +refill:
> +		page = __page_frag_cache_refill(nc, gfp_mask);
> +		if (!page)
> +			return NULL;
> +
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +		/* if size can vary use size else just use PAGE_SIZE */
> +		size = nc->size;
> +#endif
> +		/* Even if we own the page, we do not use atomic_set().
> +		 * This would break get_page_unless_zero() users.
> +		 */
> +		page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
> +
> +		/* reset page count bias and offset to start of new frag */
> +		nc->pfmemalloc = page_is_pfmemalloc(page);
> +		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
> +		nc->offset = size;
> +	}
> +
> +	offset = nc->offset - fragsz;
> +	if (unlikely(offset < 0)) {
> +		page = virt_to_page(nc->va);
> +
> +		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
> +			goto refill;
> +
> +		if (unlikely(nc->pfmemalloc)) {
> +			free_unref_page(page, compound_order(page));
> +			goto refill;
> +		}
> +
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +		/* if size can vary use size else just use PAGE_SIZE */
> +		size = nc->size;
> +#endif
> +		/* OK, page count is 0, we can safely set it */
> +		set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
> +
> +		/* reset page count bias and offset to start of new frag */
> +		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
> +		offset = size - fragsz;
> +		if (unlikely(offset < 0)) {
> +			/*
> +			 * The caller is trying to allocate a fragment
> +			 * with fragsz > PAGE_SIZE but the cache isn't big
> +			 * enough to satisfy the request, this may
> +			 * happen in low memory conditions.
> +			 * We don't release the cache page because
> +			 * it could make memory pressure worse
> +			 * so we simply return NULL here.
> +			 */
> +			return NULL;
> +		}
> +	}
> +
> +	nc->pagecnt_bias--;
> +	offset &= align_mask;
> +	nc->offset = offset;
> +
> +	return nc->va + offset;
> +}
> +EXPORT_SYMBOL(__page_frag_alloc_align);
> +
> +/*
> + * Frees a page fragment allocated out of either a compound or order 0 page.
> + */
> +void page_frag_free(void *addr)
> +{
> +	struct page *page = virt_to_head_page(addr);
> +
> +	if (unlikely(put_page_testzero(page)))
> +		free_unref_page(page, compound_order(page));
> +}
> +EXPORT_SYMBOL(page_frag_free);
> diff --git a/mm/page_frag_test.c b/mm/page_frag_test.c
> index 5ee3f33b756d..07748ee0a21f 100644
> --- a/mm/page_frag_test.c
> +++ b/mm/page_frag_test.c
> @@ -16,6 +16,7 @@
>  #include <linux/log2.h>
>  #include <linux/completion.h>
>  #include <linux/kthread.h>
> +#include <linux/page_frag_cache.h>
>  
>  #define OBJPOOL_NR_OBJECT_MAX	BIT(24)
>  


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ