linux-kernel - [PATCH v1 1/2] perf trace: Collect data only for certain pids

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-Id: <20240827092013.1596-2-howardchu95@gmail.com>
Date: Tue, 27 Aug 2024 17:20:12 +0800
From: Howard Chu <howardchu95@...il.com>
To: acme@...nel.org
Cc: namhyung@...nel.org,
	irogers@...gle.com,
	jolsa@...nel.org,
	adrian.hunter@...el.com,
	kan.liang@...ux.intel.com,
	linux-perf-users@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	Howard Chu <howardchu95@...il.com>
Subject: [PATCH v1 1/2] perf trace: Collect data only for certain pids

Currently, we collect augmented data for __every__ process, even when -p
argument is used.

For instance, if we do perf trace -e write -p 11451, we don't just
collect write buffer for process 11451, we collect write buffers for all
the processes running on the system. Those data will eventually go to
waste, we are not interested in them.

In this patch, I add a new BPF map: pids_allowed. We only allow
augmented data collection on certain processes. This is different from
pids_filtered, which is a map that filters the pids we don't want, for
example, we don't want to trace the perf trace itself.

Signed-off-by: Howard Chu <howardchu95@...il.com>
---
 tools/perf/builtin-trace.c                    | 45 +++++++++++++++++++
 .../bpf_skel/augmented_raw_syscalls.bpf.c     | 23 ++++++++--
 2 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 115f8dffb272..d38e0b919e8e 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3911,6 +3911,44 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
 
 	return err;
 }
+
+static int trace__set_allowed_pids(struct trace *trace)
+{
+	int err, pids_allowed_fd = bpf_map__fd(trace->skel->maps.pids_allowed);
+	bool exists = true;
+	struct str_node *pos;
+	struct strlist *pids_slist = strlist__new(trace->opts.target.pid, NULL);
+
+	trace->skel->bss->task_specific = false;
+
+	if (pids_slist) {
+		strlist__for_each_entry(pos, pids_slist) {
+			char *end_ptr;
+			int pid = strtol(pos->s, &end_ptr, 10);
+
+			if (pid == INT_MIN || pid == INT_MAX ||
+			    (*end_ptr != '\0' && *end_ptr != ','))
+				continue;
+
+			err = bpf_map_update_elem(pids_allowed_fd, &pid, &exists, BPF_ANY);
+			if (err)
+				return err;
+
+			trace->skel->bss->task_specific = true;
+		}
+	}
+
+	if (workload_pid != -1) {
+		err = bpf_map_update_elem(pids_allowed_fd, &workload_pid, &exists, BPF_ANY);
+		if (err)
+			return err;
+
+		trace->skel->bss->task_specific = true;
+	}
+
+	strlist__delete(pids_slist);
+	return 0;
+}
 #endif // HAVE_BPF_SKEL
 
 static int trace__set_ev_qualifier_filter(struct trace *trace)
@@ -4305,6 +4343,13 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 #ifdef HAVE_BPF_SKEL
 	if (trace->skel && trace->skel->progs.sys_enter)
 		trace__init_syscalls_bpf_prog_array_maps(trace);
+
+	if (trace->skel) {
+		/* set up workload and user-specified pids for BPF */
+		err = trace__set_allowed_pids(trace);
+		if (err)
+			goto out_error_mem;
+	}
 #endif
 
 	if (trace->ev_qualifier_ids.nr > 0) {
diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index f29a8dfca044..1ab0a56c8f35 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -24,6 +24,8 @@
 
 #define MAX_CPUS  4096
 
+volatile bool task_specific;
+
 /* bpf-output associated map */
 struct __augmented_syscalls__ {
 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
@@ -81,6 +83,13 @@ struct pids_filtered {
 	__uint(max_entries, 64);
 } pids_filtered SEC(".maps");
 
+struct pids_allowed {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, pid_t);
+	__type(value, bool);
+	__uint(max_entries, 512);
+} pids_allowed SEC(".maps");
+
 /*
  * Desired design of maximum size and alignment (see RFC2553)
  */
@@ -393,9 +402,15 @@ static pid_t getpid(void)
 	return bpf_get_current_pid_tgid();
 }
 
-static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
+static inline bool should_filter(void)
 {
-	return bpf_map_lookup_elem(pids, &pid) != NULL;
+	pid_t pid = getpid();
+
+	if (bpf_map_lookup_elem(&pids_filtered, &pid) ||
+	    (task_specific && !bpf_map_lookup_elem(&pids_allowed, &pid)))
+		return true;
+
+	return false;
 }
 
 static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
@@ -497,7 +512,7 @@ int sys_enter(struct syscall_enter_args *args)
 	 * initial, non-augmented raw_syscalls:sys_enter payload.
 	 */
 
-	if (pid_filter__has(&pids_filtered, getpid()))
+	if (should_filter())
 		return 0;
 
 	augmented_args = augmented_args_payload();
@@ -523,7 +538,7 @@ int sys_exit(struct syscall_exit_args *args)
 {
 	struct syscall_exit_args exit_args;
 
-	if (pid_filter__has(&pids_filtered, getpid()))
+	if (should_filter())
 		return 0;
 
 	bpf_probe_read_kernel(&exit_args, sizeof(exit_args), args);
-- 
2.46.0