[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240819053605.11706-9-neilb@suse.de>
Date: Mon, 19 Aug 2024 15:20:42 +1000
From: NeilBrown <neilb@...e.de>
To: Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Linus Torvalds <torvalds@...ux-foundation.org>
Cc: linux-kernel@...r.kernel.org,
linux-fsdevel@...r.kernel.org
Subject: [PATCH 8/9] Improve and extend wake_up_var() interface.
wake_up_var() is similar to wake_up_bit() and benefits from a richer
interface which includes barriers.
- wake_up_var() now has smp_mb__after_atomic() and should be used after
the variable is changed atomically - including inside a locked region.
- wake_up_var_mb() now has smb_mb() and should be used after the variable
has been changed non-atomically
- wake_up_var_relaxed() can be used when no barrier is needed, such as
after atomic_dec_and_test.
Signed-off-by: NeilBrown <neilb@...e.de>
---
block/bdev.c | 3 +-
drivers/block/pktcdvd.c | 3 +-
fs/afs/cell.c | 16 +++++------
fs/netfs/fscache_cache.c | 2 +-
fs/netfs/fscache_cookie.c | 6 ----
fs/netfs/fscache_volume.c | 2 +-
fs/netfs/io.c | 4 +--
fs/nfs/dir.c | 3 +-
fs/nfs/nfs4state.c | 2 +-
fs/nfsd/nfs4state.c | 2 +-
fs/notify/mark.c | 2 +-
fs/super.c | 20 +++++---------
fs/xfs/xfs_buf.c | 2 +-
include/linux/mbcache.h | 2 +-
include/linux/wait_bit.h | 58 +++++++++++++++++++++++++++++++++++++--
kernel/events/core.c | 6 ++--
kernel/sched/core.c | 2 +-
kernel/sched/wait_bit.c | 21 ++++++++++++--
kernel/softirq.c | 3 +-
mm/memremap.c | 2 +-
security/landlock/fs.c | 2 +-
21 files changed, 108 insertions(+), 55 deletions(-)
diff --git a/block/bdev.c b/block/bdev.c
index d804c91c651b..34ee3e155c18 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -561,8 +561,7 @@ static void bd_clear_claiming(struct block_device *whole, void *holder)
/* tell others that we're done */
BUG_ON(whole->bd_claiming != holder);
whole->bd_claiming = NULL;
- smp_mb();
- wake_up_var(&whole->bd_claiming);
+ wake_up_var_mb(&whole->bd_claiming);
}
/**
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 273fbe05d80f..e774057329c6 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1210,8 +1210,7 @@ static int pkt_handle_queue(struct pktcdvd_device *pd)
if (pd->congested &&
pd->bio_queue_size <= pd->write_congestion_off) {
pd->congested = false;
- smp_mb();
- wake_up_var(&pd->congested);
+ wake_up_var_mb(&pd->congested);
}
spin_unlock(&pd->lock);
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 422bcc26becc..726bf48094ce 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -478,7 +478,7 @@ static int afs_update_cell(struct afs_cell *cell)
out_wake:
smp_store_release(&cell->dns_lookup_count,
cell->dns_lookup_count + 1); /* vs source/status */
- wake_up_var(&cell->dns_lookup_count);
+ wake_up_var_mb(&cell->dns_lookup_count);
_leave(" = %d", ret);
return ret;
}
@@ -748,12 +748,12 @@ static void afs_manage_cell(struct afs_cell *cell)
if (cell->state == AFS_CELL_FAILED)
goto done;
smp_store_release(&cell->state, AFS_CELL_UNSET);
- wake_up_var(&cell->state);
+ wake_up_var_mb(&cell->state);
goto again;
case AFS_CELL_UNSET:
smp_store_release(&cell->state, AFS_CELL_ACTIVATING);
- wake_up_var(&cell->state);
+ wake_up_var_mb(&cell->state);
goto again;
case AFS_CELL_ACTIVATING:
@@ -762,7 +762,7 @@ static void afs_manage_cell(struct afs_cell *cell)
goto activation_failed;
smp_store_release(&cell->state, AFS_CELL_ACTIVE);
- wake_up_var(&cell->state);
+ wake_up_var_mb(&cell->state);
goto again;
case AFS_CELL_ACTIVE:
@@ -775,7 +775,7 @@ static void afs_manage_cell(struct afs_cell *cell)
goto done;
}
smp_store_release(&cell->state, AFS_CELL_DEACTIVATING);
- wake_up_var(&cell->state);
+ wake_up_var_mb(&cell->state);
goto again;
case AFS_CELL_DEACTIVATING:
@@ -783,7 +783,7 @@ static void afs_manage_cell(struct afs_cell *cell)
goto reverse_deactivation;
afs_deactivate_cell(net, cell);
smp_store_release(&cell->state, AFS_CELL_INACTIVE);
- wake_up_var(&cell->state);
+ wake_up_var_mb(&cell->state);
goto again;
case AFS_CELL_REMOVED:
@@ -800,12 +800,12 @@ static void afs_manage_cell(struct afs_cell *cell)
afs_deactivate_cell(net, cell);
smp_store_release(&cell->state, AFS_CELL_FAILED); /* vs error */
- wake_up_var(&cell->state);
+ wake_up_var_mb(&cell->state);
goto again;
reverse_deactivation:
smp_store_release(&cell->state, AFS_CELL_ACTIVE);
- wake_up_var(&cell->state);
+ wake_up_var_mb(&cell->state);
_leave(" [deact->act]");
return;
diff --git a/fs/netfs/fscache_cache.c b/fs/netfs/fscache_cache.c
index 9397ed39b0b4..83e6d25a5e0a 100644
--- a/fs/netfs/fscache_cache.c
+++ b/fs/netfs/fscache_cache.c
@@ -321,7 +321,7 @@ void fscache_end_cache_access(struct fscache_cache *cache, enum fscache_access_t
trace_fscache_access_cache(cache->debug_id, refcount_read(&cache->ref),
n_accesses, why);
if (n_accesses == 0)
- wake_up_var(&cache->n_accesses);
+ wake_up_var_relaxed(&cache->n_accesses);
}
/**
diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c
index bce2492186d0..93c66938b164 100644
--- a/fs/netfs/fscache_cookie.c
+++ b/fs/netfs/fscache_cookie.c
@@ -191,12 +191,6 @@ bool fscache_begin_cookie_access(struct fscache_cookie *cookie,
static inline void wake_up_cookie_state(struct fscache_cookie *cookie)
{
- /* Use a barrier to ensure that waiters see the state variable
- * change, as spin_unlock doesn't guarantee a barrier.
- *
- * See comments over wake_up_bit() and waitqueue_active().
- */
- smp_mb();
wake_up_var(&cookie->state);
}
diff --git a/fs/netfs/fscache_volume.c b/fs/netfs/fscache_volume.c
index cb75c07b5281..c6c43a87f56e 100644
--- a/fs/netfs/fscache_volume.c
+++ b/fs/netfs/fscache_volume.c
@@ -128,7 +128,7 @@ void fscache_end_volume_access(struct fscache_volume *volume,
refcount_read(&volume->ref),
n_accesses, why);
if (n_accesses == 0)
- wake_up_var(&volume->n_accesses);
+ wake_up_var_relaxed(&volume->n_accesses);
}
EXPORT_SYMBOL(fscache_end_volume_access);
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index c93851b98368..ebae3cfcad20 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -181,7 +181,7 @@ static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq)
if (atomic_dec_and_test(&rreq->nr_outstanding))
return true;
- wake_up_var(&rreq->nr_outstanding);
+ wake_up_var_relaxed(&rreq->nr_outstanding);
return false;
}
@@ -372,7 +372,7 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq,
if (u == 0)
netfs_rreq_terminated(rreq, was_async);
else if (u == 1)
- wake_up_var(&rreq->nr_outstanding);
+ wake_up_var_relaxed(&rreq->nr_outstanding);
netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
return;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4cb97ef41350..1d745f105095 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1837,9 +1837,8 @@ static void block_revalidate(struct dentry *dentry)
static void unblock_revalidate(struct dentry *dentry)
{
- /* store_release ensures wait_var_event() sees the update */
smp_store_release(&dentry->d_fsdata, NULL);
- wake_up_var(&dentry->d_fsdata);
+ wake_up_var_mb(&dentry->d_fsdata);
}
/*
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 877f682b45f2..d038409a9a9a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1220,7 +1220,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
swapon = !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE,
&clp->cl_state);
if (!swapon) {
- wake_up_var(&clp->cl_state);
+ wake_up_var_relaxed(&clp->cl_state);
return;
}
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a20c2c9d7d45..d156ac7637cf 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7478,8 +7478,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto put_stateid;
trace_nfsd_deleg_return(stateid);
- wake_up_var(d_inode(cstate->current_fh.fh_dentry));
destroy_delegation(dp);
+ wake_up_var(d_inode(cstate->current_fh.fh_dentry));
put_stateid:
nfs4_put_stid(&dp->dl_stid);
out:
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 5e170e713088..ff3d84e9db4d 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -139,7 +139,7 @@ static void fsnotify_get_sb_watched_objects(struct super_block *sb)
static void fsnotify_put_sb_watched_objects(struct super_block *sb)
{
if (atomic_long_dec_and_test(fsnotify_sb_watched_objects(sb)))
- wake_up_var(fsnotify_sb_watched_objects(sb));
+ wake_up_var_relaxed(fsnotify_sb_watched_objects(sb));
}
static void fsnotify_get_inode_ref(struct inode *inode)
diff --git a/fs/super.c b/fs/super.c
index 38d72a3cf6fc..96b9a682a7ca 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -158,13 +158,7 @@ static void super_wake(struct super_block *sb, unsigned int flag)
* seeing SB_BORN sent.
*/
smp_store_release(&sb->s_flags, sb->s_flags | flag);
- /*
- * Pairs with the barrier in prepare_to_wait_event() to make sure
- * ___wait_var_event() either sees SB_BORN set or
- * waitqueue_active() check in wake_up_var() sees the waiter.
- */
- smp_mb();
- wake_up_var(&sb->s_flags);
+ wake_up_var_mb(&sb->s_flags);
}
/*
@@ -2074,7 +2068,7 @@ int freeze_super(struct super_block *sb, enum freeze_holder who)
/* Nothing to do really... */
WARN_ON_ONCE(freeze_inc(sb, who) > 1);
sb->s_writers.frozen = SB_FREEZE_COMPLETE;
- wake_up_var(&sb->s_writers.frozen);
+ wake_up_var_mb(&sb->s_writers.frozen);
super_unlock_excl(sb);
return 0;
}
@@ -2094,7 +2088,7 @@ int freeze_super(struct super_block *sb, enum freeze_holder who)
if (ret) {
sb->s_writers.frozen = SB_UNFROZEN;
sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT);
- wake_up_var(&sb->s_writers.frozen);
+ wake_up_var_mb(&sb->s_writers.frozen);
deactivate_locked_super(sb);
return ret;
}
@@ -2110,7 +2104,7 @@ int freeze_super(struct super_block *sb, enum freeze_holder who)
"VFS:Filesystem freeze failed\n");
sb->s_writers.frozen = SB_UNFROZEN;
sb_freeze_unlock(sb, SB_FREEZE_FS);
- wake_up_var(&sb->s_writers.frozen);
+ wake_up_var_mb(&sb->s_writers.frozen);
deactivate_locked_super(sb);
return ret;
}
@@ -2121,7 +2115,7 @@ int freeze_super(struct super_block *sb, enum freeze_holder who)
*/
WARN_ON_ONCE(freeze_inc(sb, who) > 1);
sb->s_writers.frozen = SB_FREEZE_COMPLETE;
- wake_up_var(&sb->s_writers.frozen);
+ wake_up_var_mb(&sb->s_writers.frozen);
lockdep_sb_freeze_release(sb);
super_unlock_excl(sb);
return 0;
@@ -2150,7 +2144,7 @@ static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
if (sb_rdonly(sb)) {
sb->s_writers.frozen = SB_UNFROZEN;
- wake_up_var(&sb->s_writers.frozen);
+ wake_up_var_mb(&sb->s_writers.frozen);
goto out_deactivate;
}
@@ -2167,7 +2161,7 @@ static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
}
sb->s_writers.frozen = SB_UNFROZEN;
- wake_up_var(&sb->s_writers.frozen);
+ wake_up_var_mb(&sb->s_writers.frozen);
sb_freeze_unlock(sb, SB_FREEZE_FS);
out_deactivate:
deactivate_locked_super(sb);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index aa4dbda7b536..84355a859c86 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -2137,7 +2137,7 @@ xfs_buf_list_del(
struct xfs_buf *bp)
{
list_del_init(&bp->b_list);
- wake_up_var(&bp->b_list);
+ wake_up_var_mb(&bp->b_list);
}
/*
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
index 97e64184767d..65cc6bc1baa6 100644
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -52,7 +52,7 @@ static inline void mb_cache_entry_put(struct mb_cache *cache,
if (cnt > 0) {
if (cnt <= 2)
- wake_up_var(&entry->e_refcnt);
+ wake_up_var_relaxed(&entry->e_refcnt);
return;
}
__mb_cache_entry_free(cache, entry);
diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 609c10fcd344..7cdd0ab34f21 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -236,7 +236,7 @@ wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action,
}
extern void init_wait_var_entry(struct wait_bit_queue_entry *wbq_entry, void *var, int flags);
-extern void wake_up_var(void *var);
+extern void wake_up_var_relaxed(void *var);
extern wait_queue_head_t *__var_waitqueue(void *p);
#define ___wait_var_event(var, condition, state, exclusive, ret, cmd) \
@@ -375,12 +375,66 @@ static inline void clear_and_wake_up_bit(int bit, void *word)
wake_up_bit(word, bit);
}
+/**
+ * atomic_dec_and_wake_up_var - decrement a counts a wake any waiting on it.
+ *
+ * @var: the atomic_t variable being waited on.
+ *
+ * @var is decremented and if the value reaches zero, any code waiting
+ * in wake_var_event() for the variable will be woken.
+ */
static inline bool atomic_dec_and_wake_up_var(atomic_t *var)
{
if (!atomic_dec_and_test(var))
return false;
- wake_up_var(var);
+ wake_up_var_relaxed(var);
return true;
}
+/**
+ * wake_up_var_mb - wake up all waiters on a var
+ * @var: the address being waited on, a kernel virtual address
+ *
+ * There is a standard hashed waitqueue table for generic use. This is
+ * the part of the hash-table's accessor API that wakes up waiters on an
+ * address. For instance, if one were to have waiters on an address one
+ * would call wake_up_var_mb() after non-atomically modifying the
+ * variable
+ *
+ * This interface has a full barrier and so is safe to use anywhere.
+ * It is particular intended for use after the variable has been updated
+ * non-atmomically with simple assignment. Where the variable is
+ * is updated atomically, the barrier used may be excessively costly.
+ *
+ * Note that it is often appropriate to use smp_store_release() to
+ * update the field in a structure that is being waited on. This ensures
+ * dependant fields which have previously been set will have the new value
+ * visible by the time the update to the waited-on field is visible.
+ */
+static inline void wake_up_var_mb(void *var)
+{
+ smp_mb();
+ wake_up_var_relaxed(var);
+}
+
+/**
+ * wake_up_var - wake up all waiters on a var
+ * @var: the address being waited on, a kernel virtual address
+ *
+ * There is a standard hashed waitqueue table for generic use. This
+ * is the part of the hash-table's accessor API that wakes up waiters
+ * on a variable. Waiting in wait_var_event() can be worken by this.
+ *
+ * This interface should only be used after the variable has been updated
+ * atomically such as in a locked region which has been unlocked, or by
+ * atomic_set() or xchg() etc.
+ * If the variable has been updated non-atomically then wake_up_var_mb()
+ * should be used.
+ */
+static inline void wake_up_var(void *var)
+{
+ smp_mb__after_atomic();
+ wake_up_var_relaxed(var);
+}
+
#endif /* _LINUX_WAIT_BIT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index aa3450bdc227..e00be2c7dae7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5451,8 +5451,7 @@ int perf_event_release_kernel(struct perf_event *event)
* ctx while the child_mutex got released above, make sure to
* notify about the preceding put_ctx().
*/
- smp_mb(); /* pairs with wait_var_event() */
- wake_up_var(var);
+ wake_up_var_mb(var);
}
goto again;
}
@@ -5468,8 +5467,7 @@ int perf_event_release_kernel(struct perf_event *event)
* Wake any perf_event_free_task() waiting for this event to be
* freed.
*/
- smp_mb(); /* pairs with wait_var_event() */
- wake_up_var(var);
+ wake_up_var_mb(var);
}
no_ctx:
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f3951e4a55e5..e4054f19bb25 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2898,7 +2898,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
wait_for_completion(&pending->done);
if (refcount_dec_and_test(&pending->refs))
- wake_up_var(&pending->refs); /* No UaF, just an address */
+ wake_up_var_relaxed(&pending->refs); /* No UaF, just an address */
/*
* Block the original owner of &pending until all subsequent callers
diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c
index 51d923bf207e..970a7874d785 100644
--- a/kernel/sched/wait_bit.c
+++ b/kernel/sched/wait_bit.c
@@ -187,11 +187,28 @@ void init_wait_var_entry(struct wait_bit_queue_entry *wbq_entry, void *var, int
}
EXPORT_SYMBOL(init_wait_var_entry);
-void wake_up_var(void *var)
+/**
+ * wake_up_var_relaxed - wake up all waiters on an address
+ * @word: the address being waited on, a kernel virtual address
+ *
+ * There is a standard hashed waitqueue table for generic use. This is
+ * the part of the hash-table's accessor API that wakes up waiters on an
+ * address. For instance, if one were to have waiters on an address one
+ * would call wake_up_bit() after updating a value at, or near, the
+ * address.
+ *
+ * In order for this to function properly, as it uses waitqueue_active()
+ * internally, some kind of memory barrier must be done prior to calling
+ * this. Typically this will be provided implicitly by an atomic function
+ * that returns value such as atomic_dec_return or test_and_clear_bit().
+ * If the bit is cleared without full ordering, an alternate interface
+ * such as wake_up_bit_sync() or wake_up_bit() should be used.
+ */
+void wake_up_var_relaxed(void *var)
{
__wake_up_bit(__var_waitqueue(var), var, -1);
}
-EXPORT_SYMBOL(wake_up_var);
+EXPORT_SYMBOL(wake_up_var_relaxed);
__sched int bit_wait(struct wait_bit_key *word, int mode)
{
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 02582017759a..a1f3bc234848 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -749,7 +749,7 @@ EXPORT_SYMBOL(__tasklet_hi_schedule);
static bool tasklet_clear_sched(struct tasklet_struct *t)
{
if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) {
- wake_up_var(&t->state);
+ wake_up_var_relaxed(&t->state);
return true;
}
@@ -885,7 +885,6 @@ void tasklet_unlock(struct tasklet_struct *t)
{
smp_mb__before_atomic();
clear_bit(TASKLET_STATE_RUN, &t->state);
- smp_mb__after_atomic();
wake_up_var(&t->state);
}
EXPORT_SYMBOL_GPL(tasklet_unlock);
diff --git a/mm/memremap.c b/mm/memremap.c
index 40d4547ce514..c04c16230551 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -523,7 +523,7 @@ bool __put_devmap_managed_folio_refs(struct folio *folio, int refs)
* stable because nobody holds a reference on the page.
*/
if (folio_ref_sub_return(folio, refs) == 1)
- wake_up_var(&folio->_refcount);
+ wake_up_var_relaxed(&folio->_refcount);
return true;
}
EXPORT_SYMBOL(__put_devmap_managed_folio_refs);
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index 7877a64cc6b8..0c971d64604e 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -82,7 +82,7 @@ static void release_inode(struct landlock_object *const object)
iput(inode);
if (atomic_long_dec_and_test(&landlock_superblock(sb)->inode_refs))
- wake_up_var(&landlock_superblock(sb)->inode_refs);
+ wake_up_var_relaxed(&landlock_superblock(sb)->inode_refs);
}
static const struct landlock_object_underops landlock_fs_underops = {
--
2.44.0
Powered by blists - more mailing lists