[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJhGHyCk_cjD-Ex_OAd=DrBkTEe0Xvs81=On65Sp7sS8zNBfGQ@mail.gmail.com>
Date: Thu, 1 Feb 2024 19:02:27 +0800
From: Lai Jiangshan <jiangshanlai@...il.com>
To: Tejun Heo <tj@...nel.org>
Cc: torvalds@...ux-foundation.org, mpatocka@...hat.com,
linux-kernel@...r.kernel.org, dm-devel@...ts.linux.dev, msnitzer@...hat.com,
ignat@...udflare.com, damien.lemoal@....com, bob.liu@...cle.com,
houtao1@...wei.com, peterz@...radead.org, mingo@...nel.org,
netdev@...r.kernel.org, allen.lkml@...il.com, kernel-team@...a.com
Subject: Re: [PATCH 3/8] workqueue: Implement BH workqueues to eventually
replace tasklets
Hello, Tejun
On Tue, Jan 30, 2024 at 5:16 PM Tejun Heo <tj@...nel.org> wrote:
> @@ -1184,6 +1211,14 @@ static bool kick_pool(struct worker_pool *pool)
> if (!need_more_worker(pool) || !worker)
> return false;
>
> + if (pool->flags & POOL_BH) {
> + if (pool->attrs->nice == HIGHPRI_NICE_LEVEL)
> + tasklet_hi_schedule(&worker->bh_tasklet);
> + else
> + tasklet_schedule(&worker->bh_tasklet);
> + return true;
> + }
I think it is more straightforward to call bh_worker_taskletfn[_hi]()
in tasklet_action() and tasklet_hi_action() rather than add a
worker->bh_tasklet.
raise_softirq_irqoff() can be used here (kick_pool()) instead.
As the changelog said, once the conversion is complete, tasklet can be
removed and BH workqueues can directly take over the tasklet softirqs,
in which case, then, bh_worker_taskletfn[_hi]() can directly take over
the tasklet_action() and tasklet_hi_action().
> +
> p = worker->task;
>
> #ifdef CONFIG_SMP
> @@ -1663,8 +1698,16 @@ static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq, bool fill)
> lockdep_assert_held(&pool->lock);
>
> if (!nna) {
> - /* per-cpu workqueue, pwq->nr_active is sufficient */
> - obtained = pwq->nr_active < READ_ONCE(wq->max_active);
> + /*
> + * BH workqueues always share a single execution context per CPU
> + * and don't impose any max_active limit, so tryinc always
> + * succeeds. For a per-cpu workqueue, checking pwq->nr_active is
> + * sufficient.
> + */
> + if (wq->flags & WQ_BH)
> + obtained = true;
> + else
> + obtained = pwq->nr_active < READ_ONCE(wq->max_active);
I think wq->max_active can be forced to be UINT_MAX or ULONG_MAX
in the max_active management code to avoid a branch here.
> goto out;
> }
>
> @@ -2599,27 +2642,31 @@ static struct worker *create_worker(struct worker_pool *pool)
>
> worker->id = id;
>
> - if (pool->cpu >= 0)
> - snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
> - pool->attrs->nice < 0 ? "H" : "");
> - else
> - snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
> -
> - worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
> - "kworker/%s", id_buf);
> - if (IS_ERR(worker->task)) {
> - if (PTR_ERR(worker->task) == -EINTR) {
> - pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n",
> - id_buf);
> - } else {
> - pr_err_once("workqueue: Failed to create a worker thread: %pe",
> - worker->task);
> + if (pool->flags & POOL_BH) {
> + tasklet_setup(&worker->bh_tasklet, bh_worker_taskletfn);
> + } else {
> + if (pool->cpu >= 0)
> + snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
> + pool->attrs->nice < 0 ? "H" : "");
> + else
> + snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
> +
> + worker->task = kthread_create_on_node(worker_thread, worker,
> + pool->node, "kworker/%s", id_buf);
> + if (IS_ERR(worker->task)) {
> + if (PTR_ERR(worker->task) == -EINTR) {
> + pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n",
> + id_buf);
> + } else {
> + pr_err_once("workqueue: Failed to create a worker thread: %pe",
> + worker->task);
> + }
> + goto fail;
> }
> - goto fail;
> - }
>
> - set_user_nice(worker->task, pool->attrs->nice);
> - kthread_bind_mask(worker->task, pool_allowed_cpus(pool));
> + set_user_nice(worker->task, pool->attrs->nice);
> + kthread_bind_mask(worker->task, pool_allowed_cpus(pool));
> + }
>
> /* successful, attach the worker to the pool */
> worker_attach_to_pool(worker, pool);
worker_attach_to_pool() and worker_detach_from_pool also access to
worker->task with kthread_set_per_cpu() and luckily to_kthread()
checks the NULL pointer for it.
IMO, it is better to avoid calling kthread_set_per_cpu() for bh workers.
> @@ -5605,7 +5731,12 @@ static void pr_cont_pool_info(struct worker_pool *pool)
> pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
> if (pool->node != NUMA_NO_NODE)
> pr_cont(" node=%d", pool->node);
> - pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
> + pr_cont(" flags=0x%x", pool->flags);
> + if (pool->flags & POOL_BH)
> + pr_cont(" bh%s",
> + pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : "");
> + else
> + pr_cont(" nice=%d", pool->attrs->nice);
There are also some "worker->task" in show_pwq(), show_one_worker_pool(),
and show_cpu_pool_hog() needing taking care of.
Thanks
Lai
Powered by blists - more mailing lists