lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190902134003.GA14770@redhat.com>
Date:   Mon, 2 Sep 2019 15:40:03 +0200
From:   Oleg Nesterov <oleg@...hat.com>
To:     "Eric W. Biederman" <ebiederm@...ssion.com>
Cc:     Linus Torvalds <torvalds@...ux-foundation.org>,
        Russell King - ARM Linux admin <linux@...linux.org.uk>,
        Peter Zijlstra <peterz@...radead.org>,
        Chris Metcalf <cmetcalf@...hip.com>,
        Christoph Lameter <cl@...ux.com>,
        Kirill Tkhai <tkhai@...dex.ru>, Mike Galbraith <efault@....de>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...nel.org>,
        Linux List Kernel Mailing <linux-kernel@...r.kernel.org>
Subject: Re: [BUG] Use of probe_kernel_address() in task_rcu_dereference()
 without checking return value

On 08/30, Eric W. Biederman wrote:
>
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -182,6 +182,24 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
>  	put_task_struct(tsk);
>  }
>  
> +void put_dead_task_struct(struct task_struct *task)
> +{
> +	bool delay = false;
> +	unsigned long flags;
> +
> +	/* Is the task both reaped and no longer being scheduled? */
> +	raw_spin_lock_irqsave(&task->pi_lock, flags);
> +	if ((task->state == TASK_DEAD) &&
> +	    (cmpxchg(&task->exit_state, EXIT_DEAD, EXIT_RCU) == EXIT_DEAD))
> +		delay = true;
> +	raw_spin_lock_irqrestore(&task->pi_lock, flags);
> +
> +	/* If both are true use rcu delay the put_task_struct */
> +	if (delay)
> +		call_rcu(&task->rcu, delayed_put_task_struct);
> +	else
> +		put_task_struct(task);
> +}
>  
>  void release_task(struct task_struct *p)
>  {
> @@ -222,76 +240,13 @@ void release_task(struct task_struct *p)
>  
>  	write_unlock_irq(&tasklist_lock);
>  	release_thread(p);
> -	call_rcu(&p->rcu, delayed_put_task_struct);
> +	put_dead_task_struct(p);

I had a similar change in mind, see below. This is subjective, but to me
it looks more simple and clean.

Oleg.

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8dc1811..1f9b021 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1134,7 +1134,10 @@ struct task_struct {
 
 	struct tlbflush_unmap_batch	tlb_ubc;
 
-	struct rcu_head			rcu;
+	union {
+		bool			xxx;
+		struct rcu_head		rcu;
+	};
 
 	/* Cache last used pipe for splice(): */
 	struct pipe_inode_info		*splice_pipe;
diff --git a/kernel/exit.c b/kernel/exit.c
index a75b6a7..baacfce 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -182,6 +182,11 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
 	put_task_struct(tsk);
 }
 
+void call_delayed_put_task_struct(struct task_struct *p)
+{
+	if (xchg(&p->xxx, 1))
+		call_rcu(&p->rcu, delayed_put_task_struct);
+}
 
 void release_task(struct task_struct *p)
 {
@@ -222,7 +227,7 @@ void release_task(struct task_struct *p)
 
 	write_unlock_irq(&tasklist_lock);
 	release_thread(p);
-	call_rcu(&p->rcu, delayed_put_task_struct);
+	call_delayed_put_task_struct(p);
 
 	p = leader;
 	if (unlikely(zap_leader))
diff --git a/kernel/fork.c b/kernel/fork.c
index d8ae0f1..e90f6de 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -900,11 +900,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	if (orig->cpus_ptr == &orig->cpus_mask)
 		tsk->cpus_ptr = &tsk->cpus_mask;
 
-	/*
-	 * One for us, one for whoever does the "release_task()" (usually
-	 * parent)
-	 */
-	refcount_set(&tsk->usage, 2);
+	refcount_set(&tsk->usage, 1);
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	tsk->btrace_seq = 0;
 #endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2b037f1..e77389c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3135,7 +3135,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 		/* Task is done with its stack. */
 		put_task_stack(prev);
 
-		put_task_struct(prev);
+		call_delayed_put_task_struct(prev);
 	}
 
 	tick_nohz_task_switch();

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ