[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJuCfpFVdg5HkW03ZLFTyPHghW7cmYX=mA1TDajqRy8A4as42A@mail.gmail.com>
Date: Fri, 9 May 2025 10:34:52 -0700
From: Suren Baghdasaryan <surenb@...gle.com>
To: David Wang <00107082@....com>
Cc: kent.overstreet@...ux.dev, akpm@...ux-foundation.org,
linux-kernel@...r.kernel.org, linux-mm@...ck.org
Subject: Re: [PATCH 2/2] alloc_tag: keep codetag iterator cross read() calls
On Thu, May 8, 2025 at 10:53 PM David Wang <00107082@....com> wrote:
>
> When reading /proc/allocinfo, for each read syscall, seq_file would
> invoke start/stop callbacks. In start callback, a memory is alloced
> to store iterator and the iterator would start from beginning to
> walk to current read position.
>
> seq_file read() takes at most 4096 bytes, even if read with a larger
> user space buffer, meaning read out all of /proc/allocinfo, tens of read
> syscalls are needed. For example, a 306036 bytes allocinfo files need
> 76 reads:
>
> $ sudo cat /proc/allocinfo | wc
> 3964 16678 306036
> $ sudo strace -T -e read cat /proc/allocinfo
> ...
> read(3, " 4096 1 arch/x86/k"..., 131072) = 4063 <0.000062>
> ...
> read(3, " 0 0 sound/core"..., 131072) = 4021 <0.000150>
> ...
> For those n=3964 lines, each read takes about m=3964/76=52 lines,
> since iterator restart from beginning for each read(),
> it would move forward
> m steps on 1st read
> 2*m steps on 2nd read
> 3*m steps on 3rd read
> ...
> n steps on last read
> As read() along, more iterator steps make read() calls slower and
> slower. Adding those up, codetag iterator moves about O(n*n/m) steps,
> making data structure traversal take significant part of the whole reading.
> Profiling when stress reading /proc/allocinfo confirms it:
>
> vfs_read(99.959% 1677299/1677995)
> proc_reg_read_iter(99.856% 1674881/1677299)
> seq_read_iter(99.959% 1674191/1674881)
> allocinfo_start(75.664% 1266755/1674191)
> codetag_next_ct(79.217% 1003487/1266755) <---
> srso_return_thunk(1.264% 16011/1266755)
> __kmalloc_cache_noprof(0.102% 1296/1266755)
> ...
> allocinfo_show(21.287% 356378/1674191)
> allocinfo_next(1.530% 25621/1674191)
> allocinfo_start() takes about 75% of seq_read().
>
> A private data alloced at open() time can be used to carry iterator
> alive across read() calls, and avoid the memory allocation and
> iterator reset for each read(). This way, only O(1) memory allocation
> and O(n) steps iterating, and `time` shows performance improvement
> from ~7ms to ~4ms.
> Profiling with the change:
>
> vfs_read(99.865% 1581073/1583214)
> proc_reg_read_iter(99.485% 1572934/1581073)
> seq_read_iter(99.846% 1570519/1572934)
> allocinfo_show(87.428% 1373074/1570519)
> seq_buf_printf(83.695% 1149196/1373074)
> seq_buf_putc(1.917% 26321/1373074)
> _find_next_bit(1.531% 21023/1373074)
> ...
> codetag_to_text(0.490% 6727/1373074)
> ...
> allocinfo_next(6.275% 98543/1570519)
> ...
> allocinfo_start(0.369% 5790/1570519)
> ...
> allocinfo_start taks less than 1%.
>
> Signed-off-by: David Wang <00107082@....com>
I think you will be posting another version to address comments in the
first patch, but for this patch feel free to add:
Acked-by: Suren Baghdasaryan <surenb@...gle.com>
Thanks!
> ---
> lib/alloc_tag.c | 29 ++++++++++-------------------
> 1 file changed, 10 insertions(+), 19 deletions(-)
>
> diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
> index 25ecc1334b67..fdd5887769a6 100644
> --- a/lib/alloc_tag.c
> +++ b/lib/alloc_tag.c
> @@ -45,21 +45,16 @@ struct allocinfo_private {
> static void *allocinfo_start(struct seq_file *m, loff_t *pos)
> {
> struct allocinfo_private *priv;
> - struct codetag *ct;
> loff_t node = *pos;
>
> - priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> - m->private = priv;
> - if (!priv)
> - return NULL;
> -
> - priv->print_header = (node == 0);
> + priv = (struct allocinfo_private *)m->private;
> codetag_lock_module_list(alloc_tag_cttype, true);
> - priv->iter = codetag_get_ct_iter(alloc_tag_cttype);
> - while ((ct = codetag_next_ct(&priv->iter)) != NULL && node)
> - node--;
> -
> - return ct ? priv : NULL;
> + if (node == 0) {
> + priv->print_header = true;
> + priv->iter = codetag_get_ct_iter(alloc_tag_cttype);
> + codetag_next_ct(&priv->iter);
> + }
> + return priv->iter.ct ? priv : NULL;
> }
>
> static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos)
> @@ -76,12 +71,7 @@ static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos)
>
> static void allocinfo_stop(struct seq_file *m, void *arg)
> {
> - struct allocinfo_private *priv = (struct allocinfo_private *)m->private;
> -
> - if (priv) {
> - codetag_lock_module_list(alloc_tag_cttype, false);
> - kfree(priv);
> - }
> + codetag_lock_module_list(alloc_tag_cttype, false);
> }
>
> static void print_allocinfo_header(struct seq_buf *buf)
> @@ -249,7 +239,8 @@ static void __init procfs_init(void)
> if (!mem_profiling_support)
> return;
>
> - if (!proc_create_seq(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op)) {
> + if (!proc_create_seq_private(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op,
> + sizeof(struct allocinfo_private), NULL)) {
> pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME);
> shutdown_mem_profiling(false);
> }
> --
> 2.39.2
>
Powered by blists - more mailing lists