lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b3624f4.bde4.197360db3ea.Coremail.00107082@163.com>
Date: Tue, 3 Jun 2025 21:49:17 +0800 (CST)
From: "David Wang" <00107082@....com>
To: "Peter Zijlstra" <peterz@...radead.org>
Cc: mingo@...hat.com, acme@...nel.org, namhyung@...nel.org, mingo@...nel.org,
	yeoreum.yun@....com, leo.yan@....com,
	linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] perf/core: restore __perf_remove_from_context when
 DETACH_EXIT not set




At 2025-06-03 21:03:55, "David Wang" <00107082@....com> wrote:
>
>At 2025-06-03 20:54:40, "Peter Zijlstra" <peterz@...radead.org> wrote:
>>On Tue, Jun 03, 2025 at 02:50:56PM +0200, Peter Zijlstra wrote:
>>> On Tue, Jun 03, 2025 at 06:44:58PM +0800, David Wang wrote:
>>> 
>>> 
>>> > (As yeoreum.yun@....com pointed out,  the change in perf_remove_from_context() made
>>> > perf_event_set_state() happened before list_del_event(), resulting in perf_cgroup_event_disable()
>>> > not called.)
>>> 
>>> Aah, d'0h. Let me see what we should do there.
>>
>>Does this help? This way event_sched_out() will call
>>perf_cgroup_event_disable().
>>
>>
>>diff --git a/kernel/events/core.c b/kernel/events/core.c
>>index f34c99f8ce8f..adbb0372825f 100644
>>--- a/kernel/events/core.c
>>+++ b/kernel/events/core.c
>>@@ -2494,9 +2494,9 @@ __perf_remove_from_context(struct perf_event *event,
>> 	if (flags & DETACH_REVOKE)
>> 		state = PERF_EVENT_STATE_REVOKED;
>> 	if (flags & DETACH_DEAD) {
>>-		event->pending_disable = 1;
>> 		state = PERF_EVENT_STATE_DEAD;
>> 	}
>>+	event->pending_disable = 1;
>> 	event_sched_out(event, ctx);
>> 	perf_event_set_state(event, min(event->state, state));
>> 
>
>Ok, I will give it a try and update later.

Sadly no, caught a kernel panic at the first round....

I tried to use perf to reproduce this, but no luck so far. Following is the code I used to reproduce.

(The code is silly, but valid I think....)
To reproduce, I use following steps:
Open two terminals:
1. In terminal A
mkdir /sys/fs/cgroup/mytest
echo $$ > /sys/fs/cgroup/mytest/cgroup.procs
2. In terminal B
[g++ following code if not done yet g++ -o profiler xx.cpp]
./profiler mytest
3. Do something in terminal A, usually I would run following command under kernel source tree
for i in {1..200}; do find ./ -name nottobefound > /dev/null; done
4. wait for 5~10mintes
5. In terminal B, ctrl-C stop the profiler
6. reboot
(On my system, with 6.15 at most 4 rounds of test would catch a kernel panic.)

I could not reproduce it with my KVM, maybe I need more trials.
Not sure whether anyone else could reproduce this. 


---
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <poll.h>
#include <signal.h>
#include <fcntl.h>
#include <elf.h>
#include <string.h>

#include <vector>
#include <string>
#include <map>
#include <unordered_map>
#include <unordered_set>
#include <algorithm>
using namespace std;


#define MAXN  512
#define MAXCPU 128
#define error(msg) do { perror(msg); exit(1); } while(0)

static long perf_event_open(struct perf_event_attr *perf_event,
		pid_t pid, int cpu, int group_fd, unsigned long flags) {
    return syscall(__NR_perf_event_open, perf_event,
		    pid, cpu, group_fd, flags);
}

struct pollfd polls[MAXCPU];
// res for cleanup
static long long psize;
map<int, pair<void*, long long>> res;
static long long eventc = 0;

void int_exit(int _) {
    for (auto x: res) {
        auto y = x.second;
        void* addr = y.first;
        munmap(addr, (1+MAXN)*psize);
        close(x.first);
    }
    res.clear();
    printf("total %lld events collect\n", eventc);
    exit(0);
}
int process_event(char *base, unsigned long long size, unsigned long long offset) {
	struct perf_event_header* p = NULL;
	offset%=size;
	p = (struct perf_event_header*) (base+offset);
	eventc++;
	return p->size;
}

int main(int argc, char *argv[]) {
	if (argc<2) { printf("Need cgroup name\n"); return 1; }
	char xb[256];
	snprintf(xb, sizeof(xb), "/sys/fs/cgroup/%s", argv[1]);
	int cgroup_id = open(xb, O_CLOEXEC);
	if (cgroup_id <= 0) error("error open cgroup dir");
	int cpu_num = sysconf(_SC_NPROCESSORS_ONLN);
	psize = sysconf(_SC_PAGE_SIZE); // getpagesize();
	struct perf_event_attr attr;
	memset(&attr, 0, sizeof(attr));
	attr.type = PERF_TYPE_SOFTWARE;
	attr.size = sizeof(attr);
	attr.config = PERF_COUNT_SW_CPU_CLOCK;
	attr.sample_freq = 9999;//777; // adjust it
	attr.freq = 1;
	attr.wakeup_events = 16;
	attr.sample_type = PERF_SAMPLE_CALLCHAIN;
	attr.sample_max_stack = 32;
	attr.exclude_callchain_user = 1;
	// start perf event
	int i, k, fd;
	void* addr;
	for (i=0, k=0; i<cpu_num&&i<MAXCPU; i++) {
		printf("attaching cpu %d\n", i);
		fd = perf_event_open(&attr, cgroup_id, i, -1, PERF_FLAG_FD_CLOEXEC|PERF_FLAG_PID_CGROUP);
		if (fd<0) error("fail to open perf event");
		addr = mmap(NULL, (1+MAXN)*psize, PROT_READ, MAP_SHARED, fd, 0);
		if (addr == MAP_FAILED) error("mmap failed");
		res[fd] = {addr, 0};
		polls[k].fd = fd;
		polls[k].events = POLLIN;
		polls[k].revents = 0;
		k++;
	}
	signal(SIGINT, int_exit);
	signal(SIGTERM, int_exit);

	unsigned long long head;
	int event_size;
	struct perf_event_mmap_page *mp;
	while (poll(polls, k, -1)>0) {
		for (i=0; i<k; i++) {
			if ((polls[i].revents&POLLIN)==0) continue;
			fd = polls[i].fd;
			addr = res[fd].first;
			mp = (struct perf_event_mmap_page *)addr;
			head = res[fd].second;
			ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 1);
			if (head>mp->data_head) head=mp->data_head;
			head = mp->data_head-((mp->data_head-head)%mp->data_size);
			while(head<mp->data_head) {
				head += process_event((char*)addr+mp->data_offset, mp->data_size, head);
			}
			res[fd].second = mp->data_head;
			ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 0);
		}
	}
	int_exit(0);
	return 0;
}

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ