[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <86c56c9a88d07efbfe1e85bec678e86704588a15.camel@gmail.com>
Date: Mon, 01 Jul 2024 16:10:19 -0700
From: Alexander H Duyck <alexander.duyck@...il.com>
To: Yunsheng Lin <linyunsheng@...wei.com>, davem@...emloft.net,
kuba@...nel.org, pabeni@...hat.com
Cc: netdev@...r.kernel.org, linux-kernel@...r.kernel.org, David Howells
<dhowells@...hat.com>, Andrew Morton <akpm@...ux-foundation.org>,
linux-mm@...ck.org
Subject: Re: [PATCH net-next v9 02/13] mm: move the page fragment allocator
from page_alloc into its own file
On Tue, 2024-06-25 at 21:52 +0800, Yunsheng Lin wrote:
> Inspired by [1], move the page fragment allocator from page_alloc
> into its own c file and header file, as we are about to make more
> change for it to replace another page_frag implementation in
> sock.c
>
> 1. https://lore.kernel.org/all/20230411160902.4134381-3-dhowells@redhat.com/
>
> CC: David Howells <dhowells@...hat.com>
> CC: Alexander Duyck <alexander.duyck@...il.com>
> Signed-off-by: Yunsheng Lin <linyunsheng@...wei.com>
So one thing that I think might have been overlooked in the previous
reviews is the fact that the headers weren't necessarily self
sufficient. You were introducing dependencies that had to be fulfilled
by other headers.
One thing you might try doing as part of your testing would be to add a
C file that just adds your header and calls your functions to verify
that there aren't any unincluded dependencies.
> ---
> include/linux/gfp.h | 22 -----
> include/linux/mm_types.h | 18 ----
> include/linux/page_frag_cache.h | 47 +++++++++++
> include/linux/skbuff.h | 1 +
> mm/Makefile | 1 +
> mm/page_alloc.c | 136 ------------------------------
> mm/page_frag_cache.c | 144 ++++++++++++++++++++++++++++++++
> mm/page_frag_test.c | 1 +
> 8 files changed, 194 insertions(+), 176 deletions(-)
> create mode 100644 include/linux/page_frag_cache.h
> create mode 100644 mm/page_frag_cache.c
>
...
> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
> new file mode 100644
> index 000000000000..3a44bfc99750
> --- /dev/null
> +++ b/include/linux/page_frag_cache.h
> @@ -0,0 +1,47 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef _LINUX_PAGE_FRAG_CACHE_H
> +#define _LINUX_PAGE_FRAG_CACHE_H
> +
> +#include <linux/gfp_types.h>
> +
The gfp_types.h only really gives you the values you pass to the
gfp_mask. Did you mean to include linux/types.h to get the gfp_t
typedef?
> +#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK)
You should probably include linux/align.h to pull in the __ALIGN_MASK.
> +#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE)
I am pretty sure get_order is from asm/page.h as well.
> +
> +struct page_frag_cache {
> + void *va;
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
I am pretty sure PAGE_SIZE is included from asm/page.h
> + __u16 offset;
> + __u16 size;
> +#else
> + __u32 offset;
> +#endif
> + /* we maintain a pagecount bias, so that we dont dirty cache line
> + * containing page->_refcount every time we allocate a fragment.
> + */
> + unsigned int pagecnt_bias;
> + bool pfmemalloc;
> +};
> +
> +void page_frag_cache_drain(struct page_frag_cache *nc);
> +void __page_frag_cache_drain(struct page *page, unsigned int count);
> +void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
> + gfp_t gfp_mask, unsigned int align_mask);
> +
> +static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
> + unsigned int fragsz, gfp_t gfp_mask,
> + unsigned int align)
> +{
> + WARN_ON_ONCE(!is_power_of_2(align));
To get is_power_of_2 you should be including linux/log2.h.
> + return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
> +}
> +
> +static inline void *page_frag_alloc(struct page_frag_cache *nc,
> + unsigned int fragsz, gfp_t gfp_mask)
> +{
> + return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
> +}
> +
> +void page_frag_free(void *addr);
> +
> +#endif
>
...
> diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
> new file mode 100644
> index 000000000000..88f567ef0e29
> --- /dev/null
> +++ b/mm/page_frag_cache.c
> @@ -0,0 +1,144 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Page fragment allocator
> + *
> + * Page Fragment:
> + * An arbitrary-length arbitrary-offset area of memory which resides within a
> + * 0 or higher order page. Multiple fragments within that page are
> + * individually refcounted, in the page's reference counter.
> + *
> + * The page_frag functions provide a simple allocation framework for page
> + * fragments. This is used by the network stack and network device drivers to
> + * provide a backing region of memory for use as either an sk_buff->head, or to
> + * be used in the "frags" portion of skb_shared_info.
> + */
> +
> +#include <linux/export.h>
> +#include <linux/init.h>
> +#include <linux/mm.h>
> +#include <linux/page_frag_cache.h>
> +#include "internal.h"
You could probably include gfp_types.h here since this is where you are
using the GFP_XXX values.
> +
> +static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
> + gfp_t gfp_mask)
> +{
> + struct page *page = NULL;
> + gfp_t gfp = gfp_mask;
> +
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> + gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP |
> + __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
> + page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
> + PAGE_FRAG_CACHE_MAX_ORDER);
> + nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
> +#endif
> + if (unlikely(!page))
> + page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
> +
> + nc->va = page ? page_address(page) : NULL;
> +
> + return page;
> +}
> +
> +void page_frag_cache_drain(struct page_frag_cache *nc)
> +{
> + if (!nc->va)
> + return;
> +
> + __page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
> + nc->va = NULL;
> +}
> +EXPORT_SYMBOL(page_frag_cache_drain);
> +
> +void __page_frag_cache_drain(struct page *page, unsigned int count)
> +{
> + VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
> +
> + if (page_ref_sub_and_test(page, count))
> + free_unref_page(page, compound_order(page));
> +}
> +EXPORT_SYMBOL(__page_frag_cache_drain);
> +
> +void *__page_frag_alloc_align(struct page_frag_cache *nc,
> + unsigned int fragsz, gfp_t gfp_mask,
> + unsigned int align_mask)
> +{
> + unsigned int size = PAGE_SIZE;
> + struct page *page;
> + int offset;
> +
> + if (unlikely(!nc->va)) {
> +refill:
> + page = __page_frag_cache_refill(nc, gfp_mask);
> + if (!page)
> + return NULL;
> +
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> + /* if size can vary use size else just use PAGE_SIZE */
> + size = nc->size;
> +#endif
> + /* Even if we own the page, we do not use atomic_set().
> + * This would break get_page_unless_zero() users.
> + */
> + page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
> +
> + /* reset page count bias and offset to start of new frag */
> + nc->pfmemalloc = page_is_pfmemalloc(page);
> + nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
> + nc->offset = size;
> + }
> +
> + offset = nc->offset - fragsz;
> + if (unlikely(offset < 0)) {
> + page = virt_to_page(nc->va);
> +
> + if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
> + goto refill;
> +
> + if (unlikely(nc->pfmemalloc)) {
> + free_unref_page(page, compound_order(page));
> + goto refill;
> + }
> +
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> + /* if size can vary use size else just use PAGE_SIZE */
> + size = nc->size;
> +#endif
> + /* OK, page count is 0, we can safely set it */
> + set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
> +
> + /* reset page count bias and offset to start of new frag */
> + nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
> + offset = size - fragsz;
> + if (unlikely(offset < 0)) {
> + /*
> + * The caller is trying to allocate a fragment
> + * with fragsz > PAGE_SIZE but the cache isn't big
> + * enough to satisfy the request, this may
> + * happen in low memory conditions.
> + * We don't release the cache page because
> + * it could make memory pressure worse
> + * so we simply return NULL here.
> + */
> + return NULL;
> + }
> + }
> +
> + nc->pagecnt_bias--;
> + offset &= align_mask;
> + nc->offset = offset;
> +
> + return nc->va + offset;
> +}
> +EXPORT_SYMBOL(__page_frag_alloc_align);
> +
> +/*
> + * Frees a page fragment allocated out of either a compound or order 0 page.
> + */
> +void page_frag_free(void *addr)
> +{
> + struct page *page = virt_to_head_page(addr);
> +
> + if (unlikely(put_page_testzero(page)))
> + free_unref_page(page, compound_order(page));
> +}
> +EXPORT_SYMBOL(page_frag_free);
> diff --git a/mm/page_frag_test.c b/mm/page_frag_test.c
> index 5ee3f33b756d..07748ee0a21f 100644
> --- a/mm/page_frag_test.c
> +++ b/mm/page_frag_test.c
> @@ -16,6 +16,7 @@
> #include <linux/log2.h>
> #include <linux/completion.h>
> #include <linux/kthread.h>
> +#include <linux/page_frag_cache.h>
>
> #define OBJPOOL_NR_OBJECT_MAX BIT(24)
>
Powered by blists - more mailing lists