[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YuflNcUsyfQjculC@hyeyoo>
Date: Mon, 1 Aug 2022 23:37:41 +0900
From: Hyeonggon Yoo <42.hyeyoo@...il.com>
To: Vlastimil Babka <vbabka@...e.cz>
Cc: Christoph Lameter <cl@...ux.com>,
Pekka Enberg <penberg@...nel.org>,
David Rientjes <rientjes@...gle.com>,
Joonsoo Kim <iamjoonsoo.kim@....com>,
Andrew Morton <akpm@...ux-foundation.org>,
Roman Gushchin <roman.gushchin@...ux.dev>,
Joe Perches <joe@...ches.com>,
Vasily Averin <vasily.averin@...ux.dev>,
Matthew WilCox <willy@...radead.org>,
linux-kernel@...r.kernel.org, linux-mm@...ck.org
Subject: Re: [PATCH v3 08/15] mm/slab_common: kmalloc_node: pass large
requests to page allocator
On Thu, Jul 28, 2022 at 06:09:27PM +0200, Vlastimil Babka wrote:
> On 7/12/22 15:39, Hyeonggon Yoo wrote:
> > Now that kmalloc_large_node() is in common code, pass large requests
> > to page allocator in kmalloc_node() using kmalloc_large_node().
> >
> > One problem is that currently there is no tracepoint in
> > kmalloc_large_node(). Instead of simply putting tracepoint in it,
> > use kmalloc_large_node{,_notrace} depending on its caller to show
> > useful address for both inlined kmalloc_node() and
> > __kmalloc_node_track_caller() when large objects are allocated.
> >
> > Signed-off-by: Hyeonggon Yoo <42.hyeyoo@...il.com>
>
> Reviewed-by: Vlastimil Babka <vbabka@...e.cz>
>
Thanks!
> Nit below:
>
> > ---
> > v3:
> > This patch is new in v3 and this avoids
> > missing caller in __kmalloc_large_node_track_caller()
> > when kmalloc_large_node() is called.
> >
> > include/linux/slab.h | 26 +++++++++++++++++++-------
> > mm/slab.h | 2 ++
> > mm/slab_common.c | 17 ++++++++++++++++-
> > mm/slub.c | 2 +-
> > 4 files changed, 38 insertions(+), 9 deletions(-)
> >
> > diff --git a/include/linux/slab.h b/include/linux/slab.h
> > index 082499306098..fd2e129fc813 100644
> > --- a/include/linux/slab.h
> > +++ b/include/linux/slab.h
> > @@ -571,23 +571,35 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
> > return __kmalloc(size, flags);
> > }
> >
> > +#ifndef CONFIG_SLOB
> > static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
> > {
> > -#ifndef CONFIG_SLOB
> > - if (__builtin_constant_p(size) &&
> > - size <= KMALLOC_MAX_CACHE_SIZE) {
> > - unsigned int i = kmalloc_index(size);
> > + if (__builtin_constant_p(size)) {
> > + unsigned int index;
> >
> > - if (!i)
> > + if (size > KMALLOC_MAX_CACHE_SIZE)
> > + return kmalloc_large_node(size, flags, node);
> > +
> > + index = kmalloc_index(size);
> > +
> > + if (!index)
> > return ZERO_SIZE_PTR;
> >
> > return kmem_cache_alloc_node_trace(
> > - kmalloc_caches[kmalloc_type(flags)][i],
> > + kmalloc_caches[kmalloc_type(flags)][index],
> > flags, node, size);
> > }
> > -#endif
> > return __kmalloc_node(size, flags, node);
> > }
> > +#else
> > +static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
> > +{
> > + if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
> > + return kmalloc_large_node(size, flags, node);
> > +
> > + return __kmalloc_node(size, flags, node);
> > +}
> > +#endif
> >
> > /**
> > * kmalloc_array - allocate memory for an array.
> > diff --git a/mm/slab.h b/mm/slab.h
> > index a8d5eb1c323f..7cb51ff44f0c 100644
> > --- a/mm/slab.h
> > +++ b/mm/slab.h
> > @@ -273,6 +273,8 @@ void create_kmalloc_caches(slab_flags_t);
> >
> > /* Find the kmalloc slab corresponding for a certain size */
> > struct kmem_cache *kmalloc_slab(size_t, gfp_t);
> > +
> > +void *kmalloc_large_node_notrace(size_t size, gfp_t flags, int node);
> > #endif
> >
> > gfp_t kmalloc_fix_flags(gfp_t flags);
> > diff --git a/mm/slab_common.c b/mm/slab_common.c
> > index 6f855587b635..dc872e0ef0fc 100644
> > --- a/mm/slab_common.c
> > +++ b/mm/slab_common.c
> > @@ -956,7 +956,8 @@ void *kmalloc_large(size_t size, gfp_t flags)
> > }
> > EXPORT_SYMBOL(kmalloc_large);
> >
> > -void *kmalloc_large_node(size_t size, gfp_t flags, int node)
> > +static __always_inline
>
> I don't think we need to inline, compiler should be able to make
> kmalloc_large_node(_notrace) quite efficient anyway.
You mean s/static __always_inline/static/g, or like this?
kmalloc_large_node_notrace():
fold __kmalloc_large_node_notrace() into here
kmalloc_large_node():
kmalloc_large_node_notrace()
trace_kmalloc()
I have no strong opinion.
IMO It's unlikely that there would be workloads that are
meaningfully affected by inlining or not inlining __kmalloc_large_node_notrace().
Just wanted to avoid adding even tiny of overhead by this series.
>
> > +void *__kmalloc_large_node_notrace(size_t size, gfp_t flags, int node)
> > {
> > struct page *page;
> > void *ptr = NULL;
> > @@ -976,6 +977,20 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node)
> >
> > return ptr;
> > }
> > +
> > +void *kmalloc_large_node_notrace(size_t size, gfp_t flags, int node)
> > +{
> > + return __kmalloc_large_node_notrace(size, flags, node);
> > +}
> > +
> > +void *kmalloc_large_node(size_t size, gfp_t flags, int node)
> > +{
> > + void *ret = __kmalloc_large_node_notrace(size, flags, node);
> > +
> > + trace_kmalloc_node(_RET_IP_, ret, NULL, size,
> > + PAGE_SIZE << get_order(size), flags, node);
> > + return ret;
> > +}
> > EXPORT_SYMBOL(kmalloc_large_node);
> >
> > #ifdef CONFIG_SLAB_FREELIST_RANDOM
> > diff --git a/mm/slub.c b/mm/slub.c
> > index f22a84dd27de..3d02cf44adf7 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -4401,7 +4401,7 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller
> > void *ret;
> >
> > if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
> > - ret = kmalloc_large_node(size, flags, node);
> > + ret = kmalloc_large_node_notrace(size, flags, node);
> >
> > trace_kmalloc_node(caller, ret, NULL,
> > size, PAGE_SIZE << get_order(size),
>
--
Thanks,
Hyeonggon
Powered by blists - more mailing lists