[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3ee13371-0c7c-4264-b561-eceb4a7d7976@csgroup.eu>
Date: Tue, 28 Jan 2025 16:21:13 +0100
From: Christophe Leroy <christophe.leroy@...roup.eu>
To: "Dmitry V. Levin" <ldv@...ace.io>, Oleg Nesterov <oleg@...hat.com>
Cc: Alexey Gladkov <legion@...nel.org>, Charlie Jenkins
<charlie@...osinc.com>, Eugene Syromyatnikov <evgsyr@...il.com>,
Mike Frysinger <vapier@...too.org>, Renzo Davoli <renzo@...unibo.it>,
Davide Berardi <berardi.dav@...il.com>, strace-devel@...ts.strace.io,
linux-kernel@...r.kernel.org, linux-api@...r.kernel.org
Subject: Re: [PATCH v3 5/6] ptrace: introduce PTRACE_SET_SYSCALL_INFO request
Le 28/01/2025 à 10:16, Dmitry V. Levin a écrit :
> PTRACE_SET_SYSCALL_INFO is a generic ptrace API that complements
> PTRACE_GET_SYSCALL_INFO by letting the ptracer modify details of
> system calls the tracee is blocked in.
>
> This API allows ptracers to obtain and modify system call details
> in a straightforward and architecture-agnostic way.
>
> Current implementation supports changing only those bits of system call
> information that are used by strace, namely, syscall number, syscall
> arguments, and syscall return value.
How do you handle changes related to syscalls that call
force_successful_syscall_return() ?
>
> Support of changing additional details returned by PTRACE_GET_SYSCALL_INFO,
> such as instruction pointer and stack pointer, could be added later if
> needed, by using struct ptrace_syscall_info.flags to specify the additional
> details that should be set. Currently, "flags", "reserved", and
> "seccomp.reserved2" fields of struct ptrace_syscall_info must be
> initialized with zeroes; "arch", "instruction_pointer", and "stack_pointer"
> fields are ignored.
>
> PTRACE_SET_SYSCALL_INFO currently supports only PTRACE_SYSCALL_INFO_ENTRY,
> PTRACE_SYSCALL_INFO_EXIT, and PTRACE_SYSCALL_INFO_SECCOMP operations.
> Other operations could be added later if needed.
>
> Ideally, PTRACE_SET_SYSCALL_INFO should have been introduced along with
> PTRACE_GET_SYSCALL_INFO, but it didn't happen. The last straw that
> convinced me to implement PTRACE_SET_SYSCALL_INFO was apparent failure
> to provide an API of changing the first system call argument on riscv
> architecture.
>
> ptrace(2) man page:
>
> long ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data);
> ...
> PTRACE_SET_SYSCALL_INFO
> Modify information about the system call that caused the stop.
> The "data" argument is a pointer to struct ptrace_syscall_info
> that specifies the system call information to be set.
> The "addr" argument should be set to sizeof(struct ptrace_syscall_info)).
>
> Link: https://lore.kernel.org/all/59505464-c84a-403d-972f-d4b2055eeaac@gmail.com/
> Signed-off-by: Dmitry V. Levin <ldv@...ace.io>
> Reviewed-by: Alexey Gladkov <legion@...nel.org>
> Reviewed-by: Charlie Jenkins <charlie@...osinc.com>
> Tested-by: Charlie Jenkins <charlie@...osinc.com>
> ---
> include/uapi/linux/ptrace.h | 7 ++-
> kernel/ptrace.c | 121 +++++++++++++++++++++++++++++++++++-
> 2 files changed, 126 insertions(+), 2 deletions(-)
>
> diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
> index 72c038fc71d0..5f8ef6156752 100644
> --- a/include/uapi/linux/ptrace.h
> +++ b/include/uapi/linux/ptrace.h
> @@ -74,6 +74,7 @@ struct seccomp_metadata {
> };
>
> #define PTRACE_GET_SYSCALL_INFO 0x420e
> +#define PTRACE_SET_SYSCALL_INFO 0x4212
> #define PTRACE_SYSCALL_INFO_NONE 0
> #define PTRACE_SYSCALL_INFO_ENTRY 1
> #define PTRACE_SYSCALL_INFO_EXIT 2
> @@ -81,7 +82,8 @@ struct seccomp_metadata {
>
> struct ptrace_syscall_info {
> __u8 op; /* PTRACE_SYSCALL_INFO_* */
> - __u8 pad[3];
> + __u8 reserved;
> + __u16 flags;
> __u32 arch;
> __u64 instruction_pointer;
> __u64 stack_pointer;
> @@ -98,6 +100,7 @@ struct ptrace_syscall_info {
> __u64 nr;
> __u64 args[6];
> __u32 ret_data;
> + __u32 reserved2;
> } seccomp;
> };
> };
> @@ -142,6 +145,8 @@ struct ptrace_sud_config {
> __u64 len;
> };
>
> +/* 0x4212 is PTRACE_SET_SYSCALL_INFO */
> +
> /*
> * These values are stored in task->ptrace_message
> * by ptrace_stop to describe the current syscall-stop.
> diff --git a/kernel/ptrace.c b/kernel/ptrace.c
> index 22e7d74cf4cd..b9c1949186bf 100644
> --- a/kernel/ptrace.c
> +++ b/kernel/ptrace.c
> @@ -944,7 +944,10 @@ ptrace_get_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs,
> ptrace_get_syscall_info_entry(child, regs, info);
> info->seccomp.ret_data = child->ptrace_message;
>
> - /* ret_data is the last field in struct ptrace_syscall_info.seccomp */
> + /*
> + * ret_data is the last non-reserved field
> + * in struct ptrace_syscall_info.seccomp
> + */
> return offsetofend(struct ptrace_syscall_info, seccomp.ret_data);
> }
>
> @@ -1016,6 +1019,118 @@ ptrace_get_syscall_info(struct task_struct *child, unsigned long user_size,
> write_size = min(actual_size, user_size);
> return copy_to_user(datavp, &info, write_size) ? -EFAULT : actual_size;
> }
> +
> +static int
> +ptrace_set_syscall_info_entry(struct task_struct *child, struct pt_regs *regs,
> + struct ptrace_syscall_info *info)
> +{
> + unsigned long args[ARRAY_SIZE(info->entry.args)];
> + int nr = info->entry.nr;
> + int i;
> +
> + /*
> + * Check that the syscall number specified in info->entry.nr
> + * is either a value of type "int" or a sign-extended value
> + * of type "int".
> + */
> + if (nr != info->entry.nr)
> + return -ERANGE;
> +
> + for (i = 0; i < ARRAY_SIZE(args); i++) {
> + args[i] = info->entry.args[i];
> + /*
> + * Check that the syscall argument specified in
> + * info->entry.args[i] is either a value of type
> + * "unsigned long" or a sign-extended value of type "long".
> + */
> + if (args[i] != info->entry.args[i])
> + return -ERANGE;
> + }
> +
> + syscall_set_nr(child, regs, nr);
> + /*
> + * If the syscall number is set to -1, setting syscall arguments is not
> + * just pointless, it would also clobber the syscall return value on
> + * those architectures that share the same register both for the first
> + * argument of syscall and its return value.
> + */
> + if (nr != -1)
> + syscall_set_arguments(child, regs, args);
> +
> + return 0;
> +}
> +
> +static int
> +ptrace_set_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs,
> + struct ptrace_syscall_info *info)
> +{
> + /*
> + * info->entry is currently a subset of info->seccomp,
> + * info->seccomp.ret_data is currently ignored.
> + */
> + return ptrace_set_syscall_info_entry(child, regs, info);
> +}
> +
> +static int
> +ptrace_set_syscall_info_exit(struct task_struct *child, struct pt_regs *regs,
> + struct ptrace_syscall_info *info)
> +{
> + long rval = info->exit.rval;
> +
> + /*
> + * Check that the return value specified in info->exit.rval
> + * is either a value of type "long" or a sign-extended value
> + * of type "long".
> + */
> + if (rval != info->exit.rval)
> + return -ERANGE;
> +
> + if (info->exit.is_error)
> + syscall_set_return_value(child, regs, rval, 0);
> + else
> + syscall_set_return_value(child, regs, 0, rval);
> +
> + return 0;
> +}
> +
> +static int
> +ptrace_set_syscall_info(struct task_struct *child, unsigned long user_size,
> + const void __user *datavp)
> +{
> + struct pt_regs *regs = task_pt_regs(child);
> + struct ptrace_syscall_info info;
> +
> + if (user_size < sizeof(info))
> + return -EINVAL;
> +
> + /*
> + * The compatibility is tracked by info.op and info.flags: if user-space
> + * does not instruct us to use unknown extra bits from future versions
> + * of ptrace_syscall_info, we are not going to read them either.
> + */
> + if (copy_from_user(&info, datavp, sizeof(info)))
> + return -EFAULT;
> +
> + /* Reserved for future use. */
> + if (info.flags || info.reserved || info.seccomp.reserved2)
> + return -EINVAL;
> +
> + /* Changing the type of the system call stop is not supported yet. */
> + if (ptrace_get_syscall_info_op(child) != info.op)
> + return -EINVAL;
> +
> + switch (info.op) {
> + case PTRACE_SYSCALL_INFO_ENTRY:
> + return ptrace_set_syscall_info_entry(child, regs, &info);
> + case PTRACE_SYSCALL_INFO_EXIT:
> + return ptrace_set_syscall_info_exit(child, regs, &info);
> + case PTRACE_SYSCALL_INFO_SECCOMP:
> + return ptrace_set_syscall_info_seccomp(child, regs, &info);
> + default:
> + /* Other types of system call stops are not supported yet. */
> + return -EINVAL;
> + }
> +}
> #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
>
> int ptrace_request(struct task_struct *child, long request,
> @@ -1234,6 +1349,10 @@ int ptrace_request(struct task_struct *child, long request,
> case PTRACE_GET_SYSCALL_INFO:
> ret = ptrace_get_syscall_info(child, addr, datavp);
> break;
> +
> + case PTRACE_SET_SYSCALL_INFO:
> + ret = ptrace_set_syscall_info(child, addr, datavp);
> + break;
> #endif
>
> case PTRACE_SECCOMP_GET_FILTER:
Powered by blists - more mailing lists