[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250611131438.651493-8-marco.crivellari@suse.com>
Date: Wed, 11 Jun 2025 15:14:36 +0200
From: Marco Crivellari <marco.crivellari@...e.com>
To: linux-kernel@...r.kernel.org
Cc: Tejun Heo <tj@...nel.org>,
Lai Jiangshan <jiangshanlai@...il.com>,
Thomas Gleixner <tglx@...utronix.de>,
Frederic Weisbecker <frederic@...nel.org>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
Marco Crivellari <marco.crivellari@...e.com>,
Michal Hocko <mhocko@...e.com>
Subject: [PATCH v3 7/9] Workqueue: fs: WQ_PERCPU added to alloc_workqueue users
Currently if a user enqueue a work item using schedule_delayed_work() the
used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use
WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to
schedule_work() that is using system_wq and queue_work(), that makes use
again of WORK_CPU_UNBOUND.
This lack of consistentcy cannot be addressed without refactoring the API.
alloc_workqueue() treats all queues as per-CPU by default, while unbound
workqueues must opt-in via WQ_UNBOUND.
This default is suboptimal: most workloads benefit from unbound queues,
allowing the scheduler to place worker threads where they’re needed and
reducing noise when CPUs are isolated.
This default is suboptimal: most workloads benefit from unbound queues,
allowing the scheduler to place worker threads where they’re needed and
reducing noise when CPUs are isolated.
This patch adds a new WQ_PERCPU flag to all the fs subsystem users to
explicitly request the use of the per-CPU behavior. Both flags coexist
for one release cycle to allow callers to transition their calls.
Once migration is complete, WQ_UNBOUND can be removed and unbound will
become the implicit default.
With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND),
any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND
must now use WQ_PERCPU.
All existing users have been updated accordingly.
Suggested-by: Tejun Heo <tj@...nel.org>
Signed-off-by: Marco Crivellari <marco.crivellari@...e.com>
---
fs/afs/main.c | 4 ++--
fs/bcachefs/super.c | 10 +++++-----
fs/btrfs/async-thread.c | 3 +--
fs/btrfs/disk-io.c | 2 +-
fs/ceph/super.c | 2 +-
fs/dlm/lowcomms.c | 2 +-
fs/dlm/main.c | 2 +-
fs/fs-writeback.c | 2 +-
fs/gfs2/main.c | 5 +++--
fs/gfs2/ops_fstype.c | 6 ++++--
fs/ocfs2/dlm/dlmdomain.c | 3 ++-
fs/ocfs2/dlmfs/dlmfs.c | 3 ++-
fs/smb/client/cifsfs.c | 16 +++++++++++-----
fs/smb/server/ksmbd_work.c | 2 +-
fs/smb/server/transport_rdma.c | 3 ++-
fs/super.c | 3 ++-
fs/verity/verify.c | 2 +-
fs/xfs/xfs_log.c | 3 +--
fs/xfs/xfs_mru_cache.c | 3 ++-
fs/xfs/xfs_super.c | 15 ++++++++-------
20 files changed, 52 insertions(+), 39 deletions(-)
diff --git a/fs/afs/main.c b/fs/afs/main.c
index c845c5daaeba..6b7aab6abd78 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -168,13 +168,13 @@ static int __init afs_init(void)
printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n");
- afs_wq = alloc_workqueue("afs", 0, 0);
+ afs_wq = alloc_workqueue("afs", WQ_PERCPU, 0);
if (!afs_wq)
goto error_afs_wq;
afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
if (!afs_async_calls)
goto error_async;
- afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0);
+ afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (!afs_lock_manager)
goto error_lockmgr;
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index a58edde43bee..8bba5347a36e 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -909,15 +909,15 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) ||
!(c->btree_io_complete_wq = alloc_workqueue("bcachefs_btree_io",
- WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
+ WQ_HIGHPRI | WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU, 1)) ||
!(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
- WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
+ WQ_HIGHPRI | WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE | WQ_PERCPU, 1)) ||
!(c->btree_read_complete_wq = alloc_workqueue("bcachefs_btree_read_complete",
- WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 512)) ||
+ WQ_HIGHPRI | WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU, 512)) ||
!(c->btree_write_submit_wq = alloc_workqueue("bcachefs_btree_write_sumit",
- WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
+ WQ_HIGHPRI | WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU, 1)) ||
!(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
- WQ_FREEZABLE, 0)) ||
+ WQ_FREEZABLE | WQ_PERCPU, 0)) ||
#ifndef BCH_WRITE_REF_DEBUG
percpu_ref_init(&c->writes, bch2_writes_disabled,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index f3bffe08b290..0a84d86a942d 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -109,8 +109,7 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
ret->thresh = thresh;
}
- ret->normal_wq = alloc_workqueue("btrfs-%s", flags, ret->current_active,
- name);
+ ret->normal_wq = alloc_workqueue("btrfs-%s", flags, ret->current_active, name);
if (!ret->normal_wq) {
kfree(ret);
return NULL;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3dd555db3d32..f817b29a43de 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1963,7 +1963,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
{
u32 max_active = fs_info->thread_pool_size;
unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
- unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE;
+ unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU;
fs_info->workers =
btrfs_alloc_workqueue(fs_info, "worker", flags, max_active, 16);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index f3951253e393..a0302a004157 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -862,7 +862,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0);
if (!fsc->inode_wq)
goto fail_client;
- fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1);
+ fsc->cap_wq = alloc_workqueue("ceph-cap", WQ_PERCPU, 1);
if (!fsc->cap_wq)
goto fail_inode_wq;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 70abd4da17a6..6ced1fa90209 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1702,7 +1702,7 @@ static int work_start(void)
return -ENOMEM;
}
- process_workqueue = alloc_workqueue("dlm_process", WQ_HIGHPRI | WQ_BH, 0);
+ process_workqueue = alloc_workqueue("dlm_process", WQ_HIGHPRI | WQ_BH | WQ_PERCPU, 0);
if (!process_workqueue) {
log_print("can't start dlm_process");
destroy_workqueue(io_workqueue);
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index 4887c8a05318..a44d16da7187 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -52,7 +52,7 @@ static int __init init_dlm(void)
if (error)
goto out_user;
- dlm_wq = alloc_workqueue("dlm_wq", 0, 0);
+ dlm_wq = alloc_workqueue("dlm_wq", WQ_PERCPU, 0);
if (!dlm_wq) {
error = -ENOMEM;
goto out_plock;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index cf51a265bf27..4b1a53a3266b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1180,7 +1180,7 @@ void cgroup_writeback_umount(struct super_block *sb)
static int __init cgroup_writeback_init(void)
{
- isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
+ isw_wq = alloc_workqueue("inode_switch_wbs", WQ_PERCPU, 0);
if (!isw_wq)
return -ENOMEM;
return 0;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 0727f60ad028..9d65719353fa 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -151,7 +151,8 @@ static int __init init_gfs2_fs(void)
error = -ENOMEM;
gfs2_recovery_wq = alloc_workqueue("gfs2_recovery",
- WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
+ WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU,
+ 0);
if (!gfs2_recovery_wq)
goto fail_wq1;
@@ -160,7 +161,7 @@ static int __init init_gfs2_fs(void)
if (!gfs2_control_wq)
goto fail_wq2;
- gfs2_freeze_wq = alloc_workqueue("gfs2_freeze", 0, 0);
+ gfs2_freeze_wq = alloc_workqueue("gfs2_freeze", WQ_PERCPU, 0);
if (!gfs2_freeze_wq)
goto fail_wq3;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e83d293c3614..0dccb5882ef6 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1189,13 +1189,15 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
error = -ENOMEM;
sdp->sd_glock_wq = alloc_workqueue("gfs2-glock/%s",
- WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 0,
+ WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE | WQ_PERCPU,
+ 0,
sdp->sd_fsname);
if (!sdp->sd_glock_wq)
goto fail_free;
sdp->sd_delete_wq = alloc_workqueue("gfs2-delete/%s",
- WQ_MEM_RECLAIM | WQ_FREEZABLE, 0, sdp->sd_fsname);
+ WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU, 0,
+ sdp->sd_fsname);
if (!sdp->sd_delete_wq)
goto fail_glock_wq;
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 2018501b2249..2347a50f079b 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1876,7 +1876,8 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
dlm_debug_init(dlm);
snprintf(wq_name, O2NM_MAX_NAME_LEN, "dlm_wq-%s", dlm->name);
- dlm->dlm_worker = alloc_workqueue(wq_name, WQ_MEM_RECLAIM, 0);
+ dlm->dlm_worker = alloc_workqueue(wq_name, WQ_MEM_RECLAIM | WQ_PERCPU,
+ 0);
if (!dlm->dlm_worker) {
status = -ENOMEM;
mlog_errno(status);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 5130ec44e5e1..0b730535b2c8 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -595,7 +595,8 @@ static int __init init_dlmfs_fs(void)
}
cleanup_inode = 1;
- user_dlm_worker = alloc_workqueue("user_dlm", WQ_MEM_RECLAIM, 0);
+ user_dlm_worker = alloc_workqueue("user_dlm",
+ WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (!user_dlm_worker) {
status = -ENOMEM;
goto bail;
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index a08c42363ffc..3d3a76fa7210 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -1883,7 +1883,9 @@ init_cifs(void)
cifs_dbg(VFS, "dir_cache_timeout set to max of 65000 seconds\n");
}
- cifsiod_wq = alloc_workqueue("cifsiod", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+ cifsiod_wq = alloc_workqueue("cifsiod",
+ WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU,
+ 0);
if (!cifsiod_wq) {
rc = -ENOMEM;
goto out_clean_proc;
@@ -1911,28 +1913,32 @@ init_cifs(void)
}
cifsoplockd_wq = alloc_workqueue("cifsoplockd",
- WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+ WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU,
+ 0);
if (!cifsoplockd_wq) {
rc = -ENOMEM;
goto out_destroy_fileinfo_put_wq;
}
deferredclose_wq = alloc_workqueue("deferredclose",
- WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+ WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU,
+ 0);
if (!deferredclose_wq) {
rc = -ENOMEM;
goto out_destroy_cifsoplockd_wq;
}
serverclose_wq = alloc_workqueue("serverclose",
- WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+ WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU,
+ 0);
if (!serverclose_wq) {
rc = -ENOMEM;
goto out_destroy_deferredclose_wq;
}
cfid_put_wq = alloc_workqueue("cfid_put_wq",
- WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+ WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU,
+ 0);
if (!cfid_put_wq) {
rc = -ENOMEM;
goto out_destroy_serverclose_wq;
diff --git a/fs/smb/server/ksmbd_work.c b/fs/smb/server/ksmbd_work.c
index 72b00ca6e455..4a71f46d7020 100644
--- a/fs/smb/server/ksmbd_work.c
+++ b/fs/smb/server/ksmbd_work.c
@@ -78,7 +78,7 @@ int ksmbd_work_pool_init(void)
int ksmbd_workqueue_init(void)
{
- ksmbd_wq = alloc_workqueue("ksmbd-io", 0, 0);
+ ksmbd_wq = alloc_workqueue("ksmbd-io", WQ_PERCPU, 0);
if (!ksmbd_wq)
return -ENOMEM;
return 0;
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index 4998df04ab95..43b7062335fa 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -2198,7 +2198,8 @@ int ksmbd_rdma_init(void)
* for lack of credits
*/
smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq",
- WQ_HIGHPRI | WQ_MEM_RECLAIM, 0);
+ WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_PERCPU,
+ 0);
if (!smb_direct_wq)
return -ENOMEM;
diff --git a/fs/super.c b/fs/super.c
index 97a17f9d9023..0a9af48f30dd 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -2174,7 +2174,8 @@ int sb_init_dio_done_wq(struct super_block *sb)
{
struct workqueue_struct *old;
struct workqueue_struct *wq = alloc_workqueue("dio/%s",
- WQ_MEM_RECLAIM, 0,
+ WQ_MEM_RECLAIM | WQ_PERCPU,
+ 0,
sb->s_id);
if (!wq)
return -ENOMEM;
diff --git a/fs/verity/verify.c b/fs/verity/verify.c
index 4fcad0825a12..b8f53d1cfd20 100644
--- a/fs/verity/verify.c
+++ b/fs/verity/verify.c
@@ -357,7 +357,7 @@ void __init fsverity_init_workqueue(void)
* latency on ARM64.
*/
fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue",
- WQ_HIGHPRI,
+ WQ_HIGHPRI | WQ_PERCPU,
num_online_cpus());
if (!fsverity_read_workqueue)
panic("failed to allocate fsverity_read_queue");
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 6493bdb57351..3fecb066eeb3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1489,8 +1489,7 @@ xlog_alloc_log(
log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */
log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s",
- XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM |
- WQ_HIGHPRI),
+ XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_PERCPU),
0, mp->m_super->s_id);
if (!log->l_ioend_workqueue)
goto out_free_iclog;
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index d0f5b403bdbe..152032f68013 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -293,7 +293,8 @@ int
xfs_mru_cache_init(void)
{
xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache",
- XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 1);
+ XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU),
+ 1);
if (!xfs_mru_reap_wq)
return -ENOMEM;
return 0;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index b2dd0c0bf509..38584c5618f4 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -565,19 +565,19 @@ xfs_init_mount_workqueues(
struct xfs_mount *mp)
{
mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
- XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
+ XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU),
1, mp->m_super->s_id);
if (!mp->m_buf_workqueue)
goto out;
mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
- XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
+ XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU),
0, mp->m_super->s_id);
if (!mp->m_unwritten_workqueue)
goto out_destroy_buf;
mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
- XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
+ XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU),
0, mp->m_super->s_id);
if (!mp->m_reclaim_workqueue)
goto out_destroy_unwritten;
@@ -589,13 +589,14 @@ xfs_init_mount_workqueues(
goto out_destroy_reclaim;
mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s",
- XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
+ XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU),
1, mp->m_super->s_id);
if (!mp->m_inodegc_wq)
goto out_destroy_blockgc;
mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s",
- XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id);
+ XFS_WQFLAGS(WQ_FREEZABLE | WQ_PERCPU), 0,
+ mp->m_super->s_id);
if (!mp->m_sync_workqueue)
goto out_destroy_inodegc;
@@ -2499,8 +2500,8 @@ xfs_init_workqueues(void)
* AGs in all the filesystems mounted. Hence use the default large
* max_active value for this workqueue.
*/
- xfs_alloc_wq = alloc_workqueue("xfsalloc",
- XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 0);
+ xfs_alloc_wq = alloc_workqueue("xfsalloc", XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU),
+ 0);
if (!xfs_alloc_wq)
return -ENOMEM;
--
2.49.0
Powered by blists - more mailing lists