lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <47697d5f8d557113244b7c044251fe09@paul-moore.com>
Date: Tue, 10 Sep 2024 17:07:57 -0400
From: Paul Moore <paul@...l-moore.com>
To: Jann Horn <jannh@...gle.com>, James Morris <jmorris@...ei.org>, "Serge E. Hallyn" <serge@...lyn.com>, John Johansen <john.johansen@...onical.com>, David Howells <dhowells@...hat.com>, Jarkko Sakkinen <jarkko@...nel.org>, Mickaël Salaün <mic@...ikod.net>, Günther Noack <gnoack@...gle.com>, Stephen Smalley <stephen.smalley.work@...il.com>, Ondrej Mosnacek <omosnace@...hat.com>, Casey Schaufler <casey@...aufler-ca.com>
Cc: linux-kernel@...r.kernel.org, linux-security-module@...r.kernel.org, apparmor@...ts.ubuntu.com, keyrings@...r.kernel.org, selinux@...r.kernel.org, Jann Horn <jannh@...gle.com>
Subject: Re: [PATCH v2 1/2] KEYS: use synchronous task work for changing parent  credentials

On Aug  5, 2024 Jann Horn <jannh@...gle.com> wrote:
> 
> keyctl_session_to_parent() involves posting task work to the parent task,
> with work function key_change_session_keyring.
> Because the task work in the parent runs asynchronously, no errors can be
> returned back to the caller of keyctl_session_to_parent(), and therefore
> the work function key_change_session_keyring() can't be allowed to fail due
> to things like memory allocation failure or permission checks - all
> allocations and checks have to happen in the child.
> 
> This is annoying for two reasons:
> 
>  - It is the only reason why cred_alloc_blank() and
>    security_transfer_creds() are necessary.
>  - It means we can't do synchronous permission checks.
> 
> Rewrite keyctl_session_to_parent() to run task work on the parent
> synchronously, so that any errors that happen in the task work can be
> plumbed back into the syscall return value in the child.
> This allows us to get rid of cred_alloc_blank() and
> security_transfer_creds() in a later commit, and it will make it possible
> to write more reliable security checks for this operation.
> 
> Note that this requires using TWA_SIGNAL instead of TWA_RESUME, so the
> parent might observe some spurious -EAGAIN syscall returns or such; but the
> parent likely anyway has to be ready to deal with the side effects of
> receiving signals (since it'll probably get SIGCHLD when the child dies),
> so that probably isn't an issue.
> 
> Signed-off-by: Jann Horn <jannh@...gle.com>
> ---
>  security/keys/internal.h     |   8 ++++
>  security/keys/keyctl.c       | 107 +++++++++++++------------------------------
>  security/keys/process_keys.c |  86 ++++++++++++++++++----------------
>  3 files changed, 87 insertions(+), 114 deletions(-)

...

> diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
> index ab927a142f51..e4cfe5c4594a 100644
> --- a/security/keys/keyctl.c
> +++ b/security/keys/keyctl.c
> @@ -1616,104 +1616,63 @@ long keyctl_get_security(key_serial_t keyid,
>   * parent process.
>   *
>   * The keyring must exist and must grant the caller LINK permission, and the
>   * parent process must be single-threaded and must have the same effective
>   * ownership as this process and mustn't be SUID/SGID.
>   *
> - * The keyring will be emplaced on the parent when it next resumes userspace.
> + * The keyring will be emplaced on the parent via a pseudo-signal.
>   *
>   * If successful, 0 will be returned.
>   */
>  long keyctl_session_to_parent(void)
>  {
> -	struct task_struct *me, *parent;
> -	const struct cred *mycred, *pcred;
> -	struct callback_head *newwork, *oldwork;
> +	struct keyctl_session_to_parent_context ctx;
> +	struct task_struct *parent;
>  	key_ref_t keyring_r;
> -	struct cred *cred;
>  	int ret;
>  
>  	keyring_r = lookup_user_key(KEY_SPEC_SESSION_KEYRING, 0, KEY_NEED_LINK);
>  	if (IS_ERR(keyring_r))
>  		return PTR_ERR(keyring_r);
>  
> -	ret = -ENOMEM;
> -
> -	/* our parent is going to need a new cred struct, a new tgcred struct
> -	 * and new security data, so we allocate them here to prevent ENOMEM in
> -	 * our parent */
> -	cred = cred_alloc_blank();
> -	if (!cred)
> -		goto error_keyring;
> -	newwork = &cred->rcu;
> +	write_lock_irq(&tasklist_lock);
> +	parent = get_task_struct(rcu_dereference_protected(current->real_parent,
> +					lockdep_is_held(&tasklist_lock)));
> +	write_unlock_irq(&tasklist_lock);
>  
> -	cred->session_keyring = key_ref_to_ptr(keyring_r);
> -	keyring_r = NULL;
> -	init_task_work(newwork, key_change_session_keyring);
> +	/* the parent mustn't be init and mustn't be a kernel thread */
> +	if (is_global_init(parent) || (READ_ONCE(parent->flags) & PF_KTHREAD) != 0)
> +		goto put_task;

I think we need to explicitly set @ret if we are failing here, yes?
  
> -	me = current;
> -	rcu_read_lock();
> -	write_lock_irq(&tasklist_lock);
> +	ctx.new_session_keyring = key_ref_to_ptr(keyring_r);
> +	ctx.child_cred = current_cred();
> +	init_completion(&ctx.done);
> +	init_task_work(&ctx.work, key_change_session_keyring);
> +	ret = task_work_add(parent, &ctx.work, TWA_SIGNAL);
> +	if (ret)
> +		goto put_task;
>  
> -	ret = -EPERM;
> -	oldwork = NULL;
> -	parent = rcu_dereference_protected(me->real_parent,
> -					   lockdep_is_held(&tasklist_lock));
> +	ret = wait_for_completion_interruptible(&ctx.done);
>  
> -	/* the parent mustn't be init and mustn't be a kernel thread */
> -	if (parent->pid <= 1 || !parent->mm)
> -		goto unlock;
> -
> -	/* the parent must be single threaded */
> -	if (!thread_group_empty(parent))
> -		goto unlock;
> -
> -	/* the parent and the child must have different session keyrings or
> -	 * there's no point */
> -	mycred = current_cred();
> -	pcred = __task_cred(parent);
> -	if (mycred == pcred ||
> -	    mycred->session_keyring == pcred->session_keyring) {
> -		ret = 0;
> -		goto unlock;
> +	if (task_work_cancel(parent, &ctx.work)) {
> +		/*
> +		 * We got interrupted and the task work was canceled before it
> +		 * could execute.
> +		 * Use -ERESTARTNOINTR instead of -ERESTARTSYS for
> +		 * compatibility - the manpage does not list -EINTR as a
> +		 * possible error for keyctl().
> +		 */
> +		ret = -ERESTARTNOINTR;
> +	} else {
> +		/* task work is running or has been executed */
> +		wait_for_completion(&ctx.done);
> +		ret = ctx.result;
>  	}
>  
> -	/* the parent must have the same effective ownership and mustn't be
> -	 * SUID/SGID */
> -	if (!uid_eq(pcred->uid,	 mycred->euid) ||
> -	    !uid_eq(pcred->euid, mycred->euid) ||
> -	    !uid_eq(pcred->suid, mycred->euid) ||
> -	    !gid_eq(pcred->gid,	 mycred->egid) ||
> -	    !gid_eq(pcred->egid, mycred->egid) ||
> -	    !gid_eq(pcred->sgid, mycred->egid))
> -		goto unlock;
> -
> -	/* the keyrings must have the same UID */
> -	if ((pcred->session_keyring &&
> -	     !uid_eq(pcred->session_keyring->uid, mycred->euid)) ||
> -	    !uid_eq(mycred->session_keyring->uid, mycred->euid))
> -		goto unlock;
> -
> -	/* cancel an already pending keyring replacement */
> -	oldwork = task_work_cancel_func(parent, key_change_session_keyring);
> -
> -	/* the replacement session keyring is applied just prior to userspace
> -	 * restarting */
> -	ret = task_work_add(parent, newwork, TWA_RESUME);
> -	if (!ret)
> -		newwork = NULL;
> -unlock:
> -	write_unlock_irq(&tasklist_lock);
> -	rcu_read_unlock();
> -	if (oldwork)
> -		put_cred(container_of(oldwork, struct cred, rcu));
> -	if (newwork)
> -		put_cred(cred);
> -	return ret;
> -
> -error_keyring:
> +put_task:
> +	put_task_struct(parent);
>  	key_ref_put(keyring_r);
>  	return ret;
>  }

--
paul-moore.com

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ