[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241011021403.4089793-3-howardchu95@gmail.com>
Date: Thu, 10 Oct 2024 19:14:02 -0700
From: Howard Chu <howardchu95@...il.com>
To: peterz@...radead.org
Cc: mingo@...hat.com,
acme@...nel.org,
namhyung@...nel.org,
mark.rutland@....com,
alexander.shishkin@...ux.intel.com,
jolsa@...nel.org,
irogers@...gle.com,
adrian.hunter@...el.com,
kan.liang@...ux.intel.com,
linux-perf-users@...r.kernel.org,
linux-kernel@...r.kernel.org,
james.clark@...aro.org,
alan.maguire@...cle.com,
Howard Chu <howardchu95@...il.com>
Subject: [PATCH v2 2/2] perf trace: Rewrite BPF code to pass the verifier
Rewrite the code to add more memory bound checking in order to pass the
BPF verifier, no logic is changed.
This rewrite is centered around two main ideas:
- Always use a variable instead of an expression in if block's condition,
so BPF verifier keeps track of the correct register.
- Delay the check as late as possible, just before the BPF function
call.
Things that can be done better still:
- Instead of allowing a theoretical maximum of a 6-argument augmentation
payload, reduce the payload to a smaller fixed size.
Signed-off-by: Howard Chu <howardchu95@...il.com>
---
.../bpf_skel/augmented_raw_syscalls.bpf.c | 122 ++++++++++--------
1 file changed, 67 insertions(+), 55 deletions(-)
diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index b2f17cca014b..9ae459faac4b 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -157,9 +157,9 @@ static inline int augmented__output(void *ctx, struct augmented_args_payload *ar
return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
}
-static inline int augmented__beauty_output(void *ctx, void *data, int len)
+static inline int augmented__beauty_output(void *ctx, struct beauty_payload_enter *args, int len)
{
- return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, data, len);
+ return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
}
static inline
@@ -277,25 +277,31 @@ int sys_enter_rename(struct syscall_enter_args *args)
struct augmented_args_payload *augmented_args = augmented_args_payload();
const void *oldpath_arg = (const void *)args->args[0],
*newpath_arg = (const void *)args->args[1];
- unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len;
+ unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len, aligned_size;
if (augmented_args == NULL)
- return 1; /* Failure: don't filter */
+ goto failure;
len += 2 * sizeof(u64); // The overhead of size and err, just before the payload...
oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
- augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
- len += augmented_args->arg.size;
+ aligned_size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
+ augmented_args->arg.size = aligned_size;
+ len += aligned_size;
+
+ /* Every read from userspace is limited to value size */
+ if (aligned_size > sizeof(augmented_args->arg.value))
+ goto failure;
- struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size;
+ struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + aligned_size;
newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value));
arg2->size = newpath_len;
-
len += newpath_len;
return augmented__output(args, augmented_args, len);
+failure:
+ return 1; /* Failure: don't filter */
}
SEC("tp/syscalls/sys_enter_renameat2")
@@ -304,25 +310,31 @@ int sys_enter_renameat2(struct syscall_enter_args *args)
struct augmented_args_payload *augmented_args = augmented_args_payload();
const void *oldpath_arg = (const void *)args->args[1],
*newpath_arg = (const void *)args->args[3];
- unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len;
+ unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len, aligned_size;
if (augmented_args == NULL)
- return 1; /* Failure: don't filter */
+ goto failure;
len += 2 * sizeof(u64); // The overhead of size and err, just before the payload...
oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
- augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
- len += augmented_args->arg.size;
+ aligned_size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
+ augmented_args->arg.size = aligned_size;
+ len += aligned_size;
- struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size;
+ /* Every read from userspace is limited to value size */
+ if (aligned_size > sizeof(augmented_args->arg.value))
+ goto failure;
+
+ struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + aligned_size;
newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value));
arg2->size = newpath_len;
-
len += newpath_len;
return augmented__output(args, augmented_args, len);
+failure:
+ return 1; /* Failure: don't filter */
}
#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
@@ -422,12 +434,12 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
{
- bool augmented, do_output = false;
- int zero = 0, size, aug_size, index, output = 0,
- value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value);
+ bool do_augment = false;
+ int zero = 0, value_size = sizeof(struct augmented_arg) - sizeof(u64);
unsigned int nr, *beauty_map;
struct beauty_payload_enter *payload;
- void *arg, *payload_offset;
+ void *payload_offset, *value_offset;
+ u64 len = 0; /* has to be u64, otherwise it won't pass the verifier */
/* fall back to do predefined tail call */
if (args == NULL)
@@ -436,16 +448,18 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
/* use syscall number to get beauty_map entry */
nr = (__u32)args->syscall_nr;
beauty_map = bpf_map_lookup_elem(&beauty_map_enter, &nr);
+ if (beauty_map == NULL)
+ return 1;
/* set up payload for output */
payload = bpf_map_lookup_elem(&beauty_payload_enter_map, &zero);
payload_offset = (void *)&payload->aug_args;
-
- if (beauty_map == NULL || payload == NULL)
+ if (payload == NULL)
return 1;
/* copy the sys_enter header, which has the syscall_nr */
__builtin_memcpy(&payload->args, args, sizeof(struct syscall_enter_args));
+ len += sizeof(struct syscall_enter_args);
/*
* Determine what type of argument and how many bytes to read from user space, using the
@@ -457,52 +471,50 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
* buffer: -1 * (index of paired len) -> value of paired len (maximum: TRACE_AUG_MAX_BUF)
*/
for (int i = 0; i < 6; i++) {
- arg = (void *)args->args[i];
- augmented = false;
- size = beauty_map[i];
- aug_size = size; /* size of the augmented data read from user space */
+ int augment_size = beauty_map[i];
+ unsigned int augment_size_with_header;
+ void *addr = (void *)args->args[i];
+ bool is_augmented = false;
- if (size == 0 || arg == NULL)
+ if (augment_size == 0 || addr == NULL)
continue;
- if (size == 1) { /* string */
- aug_size = bpf_probe_read_user_str(((struct augmented_arg *)payload_offset)->value, value_size, arg);
- /* minimum of 0 to pass the verifier */
- if (aug_size < 0)
- aug_size = 0;
-
- augmented = true;
- } else if (size > 0 && size <= value_size) { /* struct */
- if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg))
- augmented = true;
- } else if (size < 0 && size >= -6) { /* buffer */
- index = -(size + 1);
- aug_size = args->args[index];
-
- if (aug_size > TRACE_AUG_MAX_BUF)
- aug_size = TRACE_AUG_MAX_BUF;
-
- if (aug_size > 0) {
- if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg))
- augmented = true;
- }
+ value_offset = ((struct augmented_arg *)payload_offset)->value;
+
+ if (augment_size == 1) { /* string */
+ augment_size = bpf_probe_read_user_str(value_offset, value_size, addr);
+ is_augmented = true;
+ } else if (augment_size > 1 && augment_size <= value_size) { /* struct */
+ if (!bpf_probe_read_user(value_offset, value_size, addr))
+ is_augmented = true;
+ } else if (augment_size < 0 && augment_size >= -6) { /* buffer */
+ int index = -(augment_size + 1);
+
+ augment_size = args->args[index] > TRACE_AUG_MAX_BUF ? TRACE_AUG_MAX_BUF : args->args[index];
+ if (!bpf_probe_read_user(value_offset, augment_size, addr))
+ is_augmented = true;
}
- /* write data to payload */
- if (augmented) {
- int written = offsetof(struct augmented_arg, value) + aug_size;
+ /* Augmented data size is limited to value size */
+ if (augment_size > value_size)
+ augment_size = value_size;
+
+ /* Explicitly define this variable to pass the verifier */
+ augment_size_with_header = sizeof(u64) + augment_size;
- ((struct augmented_arg *)payload_offset)->size = aug_size;
- output += written;
- payload_offset += written;
- do_output = true;
+ /* Write data to payload */
+ if (is_augmented && augment_size_with_header <= sizeof(struct augmented_arg)) {
+ ((struct augmented_arg *)payload_offset)->size = augment_size;
+ do_augment = true;
+ len += augment_size_with_header;
+ payload_offset += augment_size_with_header;
}
}
- if (!do_output)
+ if (!do_augment || len > sizeof(struct beauty_payload_enter))
return 1;
- return augmented__beauty_output(ctx, payload, sizeof(struct syscall_enter_args) + output);
+ return augmented__beauty_output(ctx, payload, len);
}
SEC("tp/raw_syscalls/sys_enter")
--
2.43.0
Powered by blists - more mailing lists