lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20260129084757.7e3747d0429d8231c26faa7b@kernel.org>
Date: Thu, 29 Jan 2026 08:47:57 +0900
From: Masami Hiramatsu (Google) <mhiramat@...nel.org>
To: Steven Rostedt <rostedt@...dmis.org>
Cc: LKML <linux-kernel@...r.kernel.org>, Linux Trace Kernel
 <linux-trace-kernel@...r.kernel.org>, Masami Hiramatsu
 <mhiramat@...nel.org>, Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
 Linus Torvalds <torvalds@...ux-foundation.org>
Subject: Re: [PATCH v2] ring-buffer: Add helper functions for allocations

On Tue, 25 Nov 2025 12:11:53 -0500
Steven Rostedt <rostedt@...dmis.org> wrote:

> From: Steven Rostedt <rostedt@...dmis.org>
> 
> The allocation of the per CPU buffer descriptor, the buffer page
> descriptors and the buffer page data itself can be pretty ugly:
> 
>   kzalloc_node(ALIGN(sizeof(struct buffer_page), cache_line_size()),
>                GFP_KERNEL, cpu_to_node(cpu));
> 
> And the data pages:
> 
>   page = alloc_pages_node(cpu_to_node(cpu),
>                           GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_COMP | __GFP_ZERO, order);
>   if (!page)
> 	return NULL;
>   bpage->page = page_address(page);
>   rb_init_page(bpage->page);
> 
> Add helper functions to make the code easier to read.
> 
> This does make all allocations of the data page (bpage->page) allocated
> with the __GFP_RETRY_MAYFAIL flag (and not just the bulk allocator). Which
> is actually better, as allocating the data page for the ring buffer tracing
> should try hard but not trigger the OOM killer.
> 
> Link: https://lore.kernel.org/all/CAHk-=wjMMSAaqTjBSfYenfuzE1bMjLj+2DLtLWJuGt07UGCH_Q@mail.gmail.com/
> 
> Suggested-by: Linus Torvalds <torvalds@...ux-foundation.org>
> Signed-off-by: Steven Rostedt (Google) <rostedt@...dmis.org>

Looks good to me.

Acked-by: Masami Hiramatsu (Google) <mhiramat@...nel.org>

Thanks,

> ---
> Changes since v1: https://patch.msgid.link/20251124140906.71a2abf6@gandalf.local.home
> 
> - Removed set but unused variables (kernel test robot)
> 
>  kernel/trace/ring_buffer.c | 97 +++++++++++++++++++++-----------------
>  1 file changed, 53 insertions(+), 44 deletions(-)
> 
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index 1244d2c5c384..6295443b0944 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -401,6 +401,41 @@ static void free_buffer_page(struct buffer_page *bpage)
>  	kfree(bpage);
>  }
>  
> +/*
> + * For best performance, allocate cpu buffer data cache line sized
> + * and per CPU.
> + */
> +#define alloc_cpu_buffer(cpu) (struct ring_buffer_per_cpu *)		\
> +	kzalloc_node(ALIGN(sizeof(struct ring_buffer_per_cpu),		\
> +			   cache_line_size()), GFP_KERNEL, cpu_to_node(cpu));
> +
> +#define alloc_cpu_page(cpu) (struct buffer_page *)			\
> +	kzalloc_node(ALIGN(sizeof(struct buffer_page),			\
> +			   cache_line_size()), GFP_KERNEL, cpu_to_node(cpu));
> +
> +static struct buffer_data_page *alloc_cpu_data(int cpu, int order)
> +{
> +	struct buffer_data_page *dpage;
> +	struct page *page;
> +	gfp_t mflags;
> +
> +	/*
> +	 * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
> +	 * gracefully without invoking oom-killer and the system is not
> +	 * destabilized.
> +	 */
> +	mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_COMP | __GFP_ZERO;
> +
> +	page = alloc_pages_node(cpu_to_node(cpu), mflags, order);
> +	if (!page)
> +		return NULL;
> +
> +	dpage = page_address(page);
> +	rb_init_page(dpage);
> +
> +	return dpage;
> +}
> +
>  /*
>   * We need to fit the time_stamp delta into 27 bits.
>   */
> @@ -2204,7 +2239,6 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
>  	struct ring_buffer_cpu_meta *meta = NULL;
>  	struct buffer_page *bpage, *tmp;
>  	bool user_thread = current->mm != NULL;
> -	gfp_t mflags;
>  	long i;
>  
>  	/*
> @@ -2218,13 +2252,6 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
>  	if (i < nr_pages)
>  		return -ENOMEM;
>  
> -	/*
> -	 * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
> -	 * gracefully without invoking oom-killer and the system is not
> -	 * destabilized.
> -	 */
> -	mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL;
> -
>  	/*
>  	 * If a user thread allocates too much, and si_mem_available()
>  	 * reports there's enough memory, even though there is not.
> @@ -2241,10 +2268,8 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
>  		meta = rb_range_meta(buffer, nr_pages, cpu_buffer->cpu);
>  
>  	for (i = 0; i < nr_pages; i++) {
> -		struct page *page;
>  
> -		bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
> -				    mflags, cpu_to_node(cpu_buffer->cpu));
> +		bpage = alloc_cpu_page(cpu_buffer->cpu);
>  		if (!bpage)
>  			goto free_pages;
>  
> @@ -2267,13 +2292,10 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
>  			bpage->range = 1;
>  			bpage->id = i + 1;
>  		} else {
> -			page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
> -						mflags | __GFP_COMP | __GFP_ZERO,
> -						cpu_buffer->buffer->subbuf_order);
> -			if (!page)
> +			int order = cpu_buffer->buffer->subbuf_order;
> +			bpage->page = alloc_cpu_data(cpu_buffer->cpu, order);
> +			if (!bpage->page)
>  				goto free_pages;
> -			bpage->page = page_address(page);
> -			rb_init_page(bpage->page);
>  		}
>  		bpage->order = cpu_buffer->buffer->subbuf_order;
>  
> @@ -2324,14 +2346,12 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
>  static struct ring_buffer_per_cpu *
>  rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
>  {
> -	struct ring_buffer_per_cpu *cpu_buffer __free(kfree) = NULL;
> +	struct ring_buffer_per_cpu *cpu_buffer __free(kfree) =
> +		alloc_cpu_buffer(cpu);
>  	struct ring_buffer_cpu_meta *meta;
>  	struct buffer_page *bpage;
> -	struct page *page;
>  	int ret;
>  
> -	cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
> -				  GFP_KERNEL, cpu_to_node(cpu));
>  	if (!cpu_buffer)
>  		return NULL;
>  
> @@ -2347,8 +2367,7 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
>  	init_waitqueue_head(&cpu_buffer->irq_work.full_waiters);
>  	mutex_init(&cpu_buffer->mapping_lock);
>  
> -	bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
> -			    GFP_KERNEL, cpu_to_node(cpu));
> +	bpage = alloc_cpu_page(cpu);
>  	if (!bpage)
>  		return NULL;
>  
> @@ -2370,13 +2389,10 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
>  			rb_meta_buffer_update(cpu_buffer, bpage);
>  		bpage->range = 1;
>  	} else {
> -		page = alloc_pages_node(cpu_to_node(cpu),
> -					GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
> -					cpu_buffer->buffer->subbuf_order);
> -		if (!page)
> +		int order = cpu_buffer->buffer->subbuf_order;
> +		bpage->page = alloc_cpu_data(cpu, order);
> +		if (!bpage->page)
>  			goto fail_free_reader;
> -		bpage->page = page_address(page);
> -		rb_init_page(bpage->page);
>  	}
>  
>  	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
> @@ -6464,7 +6480,6 @@ ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu)
>  	struct ring_buffer_per_cpu *cpu_buffer;
>  	struct buffer_data_read_page *bpage = NULL;
>  	unsigned long flags;
> -	struct page *page;
>  
>  	if (!cpumask_test_cpu(cpu, buffer->cpumask))
>  		return ERR_PTR(-ENODEV);
> @@ -6486,22 +6501,16 @@ ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu)
>  	arch_spin_unlock(&cpu_buffer->lock);
>  	local_irq_restore(flags);
>  
> -	if (bpage->data)
> -		goto out;
> -
> -	page = alloc_pages_node(cpu_to_node(cpu),
> -				GFP_KERNEL | __GFP_NORETRY | __GFP_COMP | __GFP_ZERO,
> -				cpu_buffer->buffer->subbuf_order);
> -	if (!page) {
> -		kfree(bpage);
> -		return ERR_PTR(-ENOMEM);
> +	if (bpage->data) {
> +		rb_init_page(bpage->data);
> +	} else {
> +		bpage->data = alloc_cpu_data(cpu, cpu_buffer->buffer->subbuf_order);
> +		if (!bpage->data) {
> +			kfree(bpage);
> +			return ERR_PTR(-ENOMEM);
> +		}
>  	}
>  
> -	bpage->data = page_address(page);
> -
> - out:
> -	rb_init_page(bpage->data);
> -
>  	return bpage;
>  }
>  EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
> -- 
> 2.51.0
> 


-- 
Masami Hiramatsu (Google) <mhiramat@...nel.org>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ