linux-kernel - Re: [PATCH 7/8] Clean up workqueue.c with respect to the freezer based cpu-hotplug

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 3 Apr 2007 15:47:29 +0400
From:	Oleg Nesterov <oleg@...sign.ru>
To:	Gautham R Shenoy <ego@...ibm.com>
Cc:	akpm@...ux-foundation.org, paulmck@...ibm.com,
	torvalds@...ux-foundation.org, linux-kernel@...r.kernel.org,
	vatsa@...ibm.com, "Rafael J. Wysocki" <rjw@...k.pl>, mingo@...e.hu,
	dipankar@...ibm.com, dino@...ibm.com,
	masami.hiramatsu.pt@...achi.com
Subject: Re: [PATCH 7/8] Clean up workqueue.c with respect to the freezer based cpu-hotplug

> On 04/02, Gautham R Shenoy wrote:
>
> Clean up workqueue.c from the perspective of freezer-based cpu-hotplug.
> This patch

I'll study these patches later, a couple of comments after the quick reading.

>   This means that all non-singlethreaded workqueues *have* to
>   be frozen to avoid any races.

We can't freeze workqueue if some work_struct continuously re-queues itself.
Perhaps this is not a problem (I don't see a good reason for such a behaviour),
but this should be documented.

>  static int worker_thread(void *__cwq)
>  {
>  	struct cpu_workqueue_struct *cwq = __cwq;
> +	int bind_cpu;
>  	DEFINE_WAIT(wait);
>  	struct k_sigaction sa;
>
>  	freezer_exempt(cwq->wq->freeze_exempt_events);
> -
> +	bind_cpu = smp_processor_id();
>  	/*
>  	 * We inherited MPOL_INTERLEAVE from the booting kernel.
>  	 * Set MPOL_DEFAULT to insure node local allocations.
> @@ -308,20 +287,28 @@ static int worker_thread(void *__cwq)
>  	siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD));
>  	do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0);
>
> -	for (;;) {
> +	while (!kthread_should_stop()) {
>  		try_to_freeze();
> -
> +
> +		if (cpu_is_offline(bind_cpu) && !is_single_threaded(cwq->wq))
> +			goto wait_to_die;
> +
>  		prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
> -		if (cwq->status == CWQ_RUNNING && list_empty(&cwq->worklist))
> +		if (list_empty(&cwq->worklist))
>  			schedule();
>  		finish_wait(&cwq->more_work, &wait);
>
> -		if (cwq_should_stop(cwq))
> -			break;
> -
>  		run_workqueue(cwq);
>  	}
>
> +wait_to_die:
> +	set_current_state(TASK_INTERRUPTIBLE);
> +	while(!kthread_should_stop()) {
> +		schedule();
> +		set_current_state(TASK_INTERRUPTIBLE);
> +	}
> +	__set_current_state(TASK_RUNNING);
> +
>  	return 0;
>  }

I still think that wait_to_die + bind_cpu is unneeded complication.
Why can't we do the following:

	static int worker_thread(void *__cwq)
	{
		...

		for (;;) {
			try_to_freeze();

			prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
			if (!kthread_should_stop() && list_empty(&cwq->worklist))
				schedule();
			finish_wait(&cwq->more_work, &wait);

			if (kthread_should_stop(cwq))
				break;

			run_workqueue(cwq);
		}

		return 0;
	}

?

>  void fastcall flush_workqueue(struct workqueue_struct *wq)
>  {
> -	const cpumask_t *cpu_map = wq_cpu_map(wq);
>  	int cpu;
>
>  	might_sleep();
> -	for_each_cpu_mask(cpu, *cpu_map)
> +	for_each_online_cpu(cpu)
>  		flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
>  }

Hm... I can't understand this change. I believe it is wrong.

> @@ -644,13 +630,6 @@ static int create_workqueue_thread(struc
>  		return PTR_ERR(p);
>
>  	cwq->thread = p;
> -	cwq->status = CWQ_RUNNING;
> -	if (!is_single_threaded(wq))
> -		kthread_bind(p, cpu);
> -
> -	if (is_single_threaded(wq) || cpu_online(cpu))
> -		wake_up_process(p);
> -
>  	return 0;
>  }

Well, this is a matter of taste, but I don't like this change. Now we
should add kthread_bind/wake_up_process calls to __create_workqueue()
and workqueue_cpu_callback(). I won't persist though.

> @@ -680,15 +659,21 @@ static struct workqueue_struct *__create
>  	if (singlethread) {
>  		cwq = init_cpu_workqueue(wq, singlethread_cpu);
>  		err = create_workqueue_thread(cwq, singlethread_cpu);
> +		wake_up_process(cwq->thread);
>  	} else {
>  		mutex_lock(&workqueue_mutex);
>  		list_add(&wq->list, &workqueues);
>
> -		for_each_possible_cpu(cpu) {
> +		for_each_online_cpu(cpu) {

This is wrong. CPU_UP_PREPARE doesn't call init_cpu_workqueue().
Easy to fix, but I personally think is is better to initialize
the whole cpu_possible_map.

>  static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
>  {
> -	struct wq_barrier barr;
> -	int alive = 0;
>
> -	spin_lock_irq(&cwq->lock);
>  	if (cwq->thread != NULL) {
> -		insert_wq_barrier(cwq, &barr, 1);
> -		cwq->status = CWQ_SHOULD_STOP;
> -		alive = 1;
> -	}
> -	spin_unlock_irq(&cwq->lock);
> -
> -	if (alive) {
>  		thaw_process(cwq->thread);
> -		wait_for_completion(&barr.done);
> -
> -		while (unlikely(cwq->status != CWQ_STOPPED))
> -			cpu_relax();
> -		/*
> -		 * Wait until cwq->thread unlocks cwq->lock,
> -		 * it won't touch *cwq after that.
> -		 */
> -		smp_rmb();
> +		kthread_stop(cwq->thread);
>  		cwq->thread = NULL;
> -		spin_unlock_wait(&cwq->lock);
>  	}
>  }

Deadlockable. Suppose that the freezing is in progress, cwq->thread is not
frozen yet. cleanup_workqueue_thread() calls thaw_process(cwq->thread),
then cwq->thread() goes to refrigerator, kthread_stop() blocks forever.

> +static void take_over_work(struct workqueue_struct *wq, unsigned int cpu)
> +{
> +	struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
> +	struct list_head list;
> +	struct work_struct *work;
> +
> +	spin_lock_irq(&cwq->lock);

This CPU is dead (or cancelled), we don't need cwq->lock.

>  static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
>  						unsigned long action,
>  						void *hcpu)
> @@ -782,11 +768,6 @@ static int __devinit workqueue_cpu_callb
>  	struct cpu_workqueue_struct *cwq;
>  	struct workqueue_struct *wq;
>
> -	switch (action) {
> -	case CPU_UP_PREPARE:
> -		cpu_set(cpu, cpu_populated_map);
> -	}
> -
>  	mutex_lock(&workqueue_mutex);
>  	list_for_each_entry(wq, &workqueues, list) {
>  		cwq = per_cpu_ptr(wq->cpu_wq, cpu);
> @@ -799,6 +780,7 @@ static int __devinit workqueue_cpu_callb
>  			return NOTIFY_BAD;
>
>  		case CPU_ONLINE:
> +			kthread_bind(cwq->thread, cpu);
>  			wake_up_process(cwq->thread);
>  			break;
>
> @@ -806,6 +788,7 @@ static int __devinit workqueue_cpu_callb
>  			if (cwq->thread)
>  				wake_up_process(cwq->thread);
>  		case CPU_DEAD:
> +			take_over_work(wq, cpu);
>  			cleanup_workqueue_thread(cwq, cpu);
>  			break;
>  		}

This means that the work_struct on single_threaded wq can't use any of

	__create_workqueue()
	destroy_workqueue()
	flush_workqueue()
	cancel_work_sync()

, they are all racy wrt workqueue_cpu_callback(), and we don't freeze
single_threaded workqueues. This is bad.

Probaly we should:

	- freeze all workqueues, even the single_threaded ones.

	- helper_init() explicitely does __create_workqueue(FE_ALL).
	  this means that we should never use the functions above
	  with this workqueue.

Oleg.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/