lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20120907230556.GJ9426@google.com>
Date:	Fri, 7 Sep 2012 16:05:56 -0700
From:	Tejun Heo <tj@...nel.org>
To:	Lai Jiangshan <laijs@...fujitsu.com>
Cc:	linux-kernel@...r.kernel.org
Subject: Re: [PATCH wq/for-3.6-fixes 3/3] workqueue: fix possible idle
 worker depletion during CPU_ONLINE

I got it down to the following but it creates a problem where CPU
hotplug queues a work item on worker->scheduled before the execution
loops starts.  :(

Need to think more about it.

 kernel/workqueue.c |   63 ++++++++++++++++++++++++-----------------------------
 1 file changed, 29 insertions(+), 34 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index dc7b845..4c7502d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -66,6 +66,7 @@ enum {
 
 	/* pool flags */
 	POOL_MANAGE_WORKERS	= 1 << 0,	/* need to manage workers */
+	POOL_MANAGING_WORKERS	= 1 << 1,
 
 	/* worker flags */
 	WORKER_STARTED		= 1 << 0,	/* started */
@@ -165,7 +166,7 @@ struct worker_pool {
 	struct timer_list	idle_timer;	/* L: worker idle timeout */
 	struct timer_list	mayday_timer;	/* L: SOS timer for workers */
 
-	struct mutex		manager_mutex;	/* mutex manager should hold */
+	struct mutex		hotplug_mutex;	/* mutex manager should hold */
 	struct ida		worker_ida;	/* L: for worker IDs */
 };
 
@@ -652,7 +653,7 @@ static bool need_to_manage_workers(struct worker_pool *pool)
 /* Do we have too many workers and should some go away? */
 static bool too_many_workers(struct worker_pool *pool)
 {
-	bool managing = mutex_is_locked(&pool->manager_mutex);
+	bool managing = pool->flags & POOL_MANAGING_WORKERS;
 	int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
 	int nr_busy = pool->nr_workers - nr_idle;
 
@@ -1390,7 +1391,7 @@ static void rebind_workers(struct global_cwq *gcwq)
 	lockdep_assert_held(&gcwq->lock);
 
 	for_each_worker_pool(pool, gcwq)
-		lockdep_assert_held(&pool->manager_mutex);
+		lockdep_assert_held(&pool->hotplug_mutex);
 
 	/*
 	 * Rebind idle workers.  Interlocked both ways.  We wait for
@@ -1713,22 +1714,16 @@ static void gcwq_mayday_timeout(unsigned long __pool)
  * spin_lock_irq(gcwq->lock) which may be released and regrabbed
  * multiple times.  Does GFP_KERNEL allocations.  Called only from
  * manager.
- *
- * RETURNS:
- * false if no action was taken and gcwq->lock stayed locked, true
- * otherwise.
  */
-static bool maybe_create_worker(struct worker_pool *pool)
-__releases(&gcwq->lock)
-__acquires(&gcwq->lock)
+static void maybe_create_worker(struct worker_pool *pool)
 {
 	struct global_cwq *gcwq = pool->gcwq;
 
+	spin_lock_irq(&gcwq->lock);
 	if (!need_to_create_worker(pool))
-		return false;
+		goto out_unlock;
 restart:
 	spin_unlock_irq(&gcwq->lock);
-
 	/* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
 	mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
 
@@ -1741,7 +1736,7 @@ restart:
 			spin_lock_irq(&gcwq->lock);
 			start_worker(worker);
 			BUG_ON(need_to_create_worker(pool));
-			return true;
+			goto out_unlock;
 		}
 
 		if (!need_to_create_worker(pool))
@@ -1758,7 +1753,8 @@ restart:
 	spin_lock_irq(&gcwq->lock);
 	if (need_to_create_worker(pool))
 		goto restart;
-	return true;
+out_unlock:
+	spin_unlock_irq(&gcwq->lock);
 }
 
 /**
@@ -1771,15 +1767,9 @@ restart:
  * LOCKING:
  * spin_lock_irq(gcwq->lock) which may be released and regrabbed
  * multiple times.  Called only from manager.
- *
- * RETURNS:
- * false if no action was taken and gcwq->lock stayed locked, true
- * otherwise.
  */
-static bool maybe_destroy_workers(struct worker_pool *pool)
+static void maybe_destroy_workers(struct worker_pool *pool)
 {
-	bool ret = false;
-
 	while (too_many_workers(pool)) {
 		struct worker *worker;
 		unsigned long expires;
@@ -1793,10 +1783,7 @@ static bool maybe_destroy_workers(struct worker_pool *pool)
 		}
 
 		destroy_worker(worker);
-		ret = true;
 	}
-
-	return ret;
 }
 
 /**
@@ -1820,24 +1807,32 @@ static bool maybe_destroy_workers(struct worker_pool *pool)
  * some action was taken.
  */
 static bool manage_workers(struct worker *worker)
+	__releases(&gcwq->lock) __acquires(&gcwq->lock)
 {
 	struct worker_pool *pool = worker->pool;
-	bool ret = false;
+	struct global_cwq *gcwq = pool->gcwq;
 
-	if (!mutex_trylock(&pool->manager_mutex))
-		return ret;
+	if (pool->flags & POOL_MANAGING_WORKERS)
+		return false;
 
 	pool->flags &= ~POOL_MANAGE_WORKERS;
 
+	spin_unlock_irq(&gcwq->lock);
+
+	/* blah blah */
+	mutex_lock(&pool->hotplug_mutex);
+
 	/*
 	 * Destroy and then create so that may_start_working() is true
 	 * on return.
 	 */
-	ret |= maybe_destroy_workers(pool);
-	ret |= maybe_create_worker(pool);
+	maybe_destroy_workers(pool);
+	maybe_create_worker(pool);
 
-	mutex_unlock(&pool->manager_mutex);
-	return ret;
+	mutex_unlock(&pool->hotplug_mutex);
+
+	spin_lock_irq(&gcwq->lock);
+	return true;
 }
 
 /**
@@ -3399,7 +3394,7 @@ static void gcwq_claim_management_and_lock(struct global_cwq *gcwq)
 	struct worker_pool *pool;
 
 	for_each_worker_pool(pool, gcwq)
-		mutex_lock_nested(&pool->manager_mutex, pool - gcwq->pools);
+		mutex_lock_nested(&pool->hotplug_mutex, pool - gcwq->pools);
 	spin_lock_irq(&gcwq->lock);
 }
 
@@ -3410,7 +3405,7 @@ static void gcwq_release_management_and_unlock(struct global_cwq *gcwq)
 
 	spin_unlock_irq(&gcwq->lock);
 	for_each_worker_pool(pool, gcwq)
-		mutex_unlock(&pool->manager_mutex);
+		mutex_unlock(&pool->hotplug_mutex);
 }
 
 static void gcwq_unbind_fn(struct work_struct *work)
@@ -3749,7 +3744,7 @@ static int __init init_workqueues(void)
 			setup_timer(&pool->mayday_timer, gcwq_mayday_timeout,
 				    (unsigned long)pool);
 
-			mutex_init(&pool->manager_mutex);
+			mutex_init(&pool->hotplug_mutex);
 			ida_init(&pool->worker_ida);
 		}
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ