lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZfGudRl9-tB_TszO@x1>
Date: Wed, 13 Mar 2024 10:47:33 -0300
From: Arnaldo Carvalho de Melo <acme@...nel.org>
To: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Cc: linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org,
	Adrian Hunter <adrian.hunter@...el.com>,
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
	Ian Rogers <irogers@...gle.com>, Ingo Molnar <mingo@...hat.com>,
	Jiri Olsa <jolsa@...nel.org>, Marco Elver <elver@...gle.com>,
	Mark Rutland <mark.rutland@....com>,
	Namhyung Kim <namhyung@...nel.org>,
	Peter Zijlstra <peterz@...radead.org>,
	Thomas Gleixner <tglx@...utronix.de>
Subject: Re: [PATCH v2 0/4] perf: Make SIGTRAP and __perf_pending_irq() work
 on RT.

On Wed, Mar 13, 2024 at 10:28:44AM -0300, Arnaldo Carvalho de Melo wrote:
> On Wed, Mar 13, 2024 at 09:13:03AM +0100, Sebastian Andrzej Siewior wrote:
> > One part I don't get: did you let it run or did you kill it?
 
> If I let them run they will finish and exit, no exec_child remains.
 
> If I instead try to stop the loop that goes on forking the 100 of them,
> then the exec_child remain spinning.
 
> > `exec_child' spins until a signal is received or the parent kills it. So
 
> > it shouldn't remain there for ever. And my guess, that it is in spinning
> > in userland and not in kernel.
 
> Checking that now:

tldr; the tight loop, full details at the end.

100.00  b6:   mov    signal_count,%eax
              test   %eax,%eax
            ↑ je     b6     

remove_on_exec.c

/* For exec'd child. */
static void exec_child(void)
{
        struct sigaction action = {};
        const int val = 42;

        /* Set up sigtrap handler in case we erroneously receive a trap. */
        action.sa_flags = SA_SIGINFO | SA_NODEFER;
        action.sa_sigaction = sigtrap_handler;
        sigemptyset(&action.sa_mask);
        if (sigaction(SIGTRAP, &action, NULL))
                _exit((perror("sigaction failed"), 1));

        /* Signal parent that we're starting to spin. */
        if (write(STDOUT_FILENO, &val, sizeof(int)) == -1)
                _exit((perror("write failed"), 1));

        /* Should hang here until killed. */
        while (!signal_count);
}

So probably just a test needing to be a bit more polished?

Seems like it, on a newer machine, faster, I managed to reproduce it on
a non-RT kernel, with one exec_child remaining:

  1.44  b6:   mov   signal_count,%eax
              test  %eax,%eax
 98.56      ↑ je    b6

same tight loop:

acme@x1:~/git/perf-tools-next/tools/testing/selftests/perf_events$ pidof exec_child
722300
acme@x1:~/git/perf-tools-next/tools/testing/selftests/perf_events$ ps ax|grep exec_child
 722300 pts/2    R      4:08 exec_child
 722502 pts/2    S+     0:00 grep --color=auto exec_child
acme@x1:~/git/perf-tools-next/tools/testing/selftests/perf_events$

- Arnaldo

[root@...e ~]# perf record --call-graph dwarf -p 35785
^C[ perf record: Woken up 48 times to write data ]
[ perf record: Captured and wrote 12.120 MB perf.data (1503 samples) ]

[root@...e ~]# ls -la perf.data
-rw-------. 1 root root 12720152 Mar 13 10:32 perf.data
[root@...e ~]#
[root@...e ~]# perf report --no-child --stdio
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 1K of event 'cycles:P'
# Event count (approx.): 926018718
#
# Overhead  Command  Shared Object      Symbol                                
# ........  .......  .................  ......................................
#
    98.48%  exe      remove_on_exec     [.] exec_child
            |
            ---exec_child
               main
               __libc_start_call_main
               __libc_start_main@@GLIBC_2.34
               _start

     0.33%  exe      [kernel.kallsyms]  [k] arch_scale_freq_tick
     0.13%  exe      [kernel.kallsyms]  [k] debug_smp_processor_id
     0.13%  exe      [kernel.kallsyms]  [k] check_cpu_stall
     0.13%  exe      [kernel.kallsyms]  [k] acct_account_cputime
     0.13%  exe      [kernel.kallsyms]  [k] cpuacct_account_field
     0.07%  exe      [kernel.kallsyms]  [k] preempt_count_add
     0.07%  exe      [kernel.kallsyms]  [k] update_irq_load_avg
     0.07%  exe      [kernel.kallsyms]  [k] cgroup_rstat_updated
     0.07%  exe      [kernel.kallsyms]  [k] rcu_sched_clock_irq
     0.07%  exe      [kernel.kallsyms]  [k] account_user_time
     0.07%  exe      [kernel.kallsyms]  [k] __hrtimer_run_queues
     0.07%  exe      [kernel.kallsyms]  [k] tick_nohz_highres_handler
     0.07%  exe      [kernel.kallsyms]  [k] ktime_get_update_offsets_now
     0.06%  exe      [kernel.kallsyms]  [k] __enqueue_entity
     0.06%  exe      [kernel.kallsyms]  [k] tick_sched_handle
     0.00%  exe      [kernel.kallsyms]  [k] __intel_pmu_enable_all.constprop.0


#
# (Tip: To show assembler sample contexts use perf record -b / perf script -F +brstackinsn --xed)
#
[root@...e ~]#

[root@...e ~]# perf annotate --stdio2 exec_child 
Samples: 1K of event 'cycles:P', 4000 Hz, Event count (approx.): 911943256, [percent: local period]
exec_child() /home/acme/git/linux/tools/testing/selftests/perf_events/remove_on_exec
Percent        
               
               
            Disassembly of section .text:
               
            00000000004045cf <exec_child>:
              push   %rbp   
              mov    %rsp,%rbp
              sub    $0xb0,%rsp
              lea    -0xa0(%rbp),%rdx
              mov    $0x0,%eax
              mov    $0x13,%ecx
              mov    %rdx,%rdi
              rep    stos %rax,%es:(%rdi)
              movl   $0x2a,-0xa4(%rbp)
              movl   $0x40000004,-0x18(%rbp)
              movq   $0x402a2e,-0xa0(%rbp)
              lea    -0xa0(%rbp),%rax
              add    $0x8,%rax
              mov    %rax,%rdi
            → callq  sigemptyset@plt
              lea    -0xa0(%rbp),%rax
              mov    $0x0,%edx
              mov    %rax,%rsi
              mov    $0x5,%edi
            → callq  sigaction@plt
              test   %eax,%eax
            ↓ je     82     
              mov    $0x4058af,%edi
            → callq  perror@plt
              mov    $0x1,%edi
            → callq  _exit@plt
        82:   lea    -0xa4(%rbp),%rax
              mov    $0x4,%edx
              mov    %rax,%rsi
              mov    $0x1,%edi
            → callq  write@plt
              cmp    $0xffffffffffffffff,%rax
            ↓ jne    b5     
              mov    $0x4058c0,%edi
            → callq  perror@plt
              mov    $0x1,%edi
            → callq  _exit@plt
        b5:   nop           
100.00  b6:   mov    signal_count,%eax
              test   %eax,%eax
            ↑ je     b6     
              nop           
              nop           
              leaveq        
            ← retq          
[root@...e ~]#

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ