lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241219173615.GL6174@frogsfrogsfrogs>
Date: Thu, 19 Dec 2024 09:36:15 -0800
From: "Darrick J. Wong" <djwong@...nel.org>
To: Jinliang Zheng <alexjlzheng@...il.com>
Cc: chandan.babu@...cle.com, linux-xfs@...r.kernel.org,
	linux-kernel@...r.kernel.org, flyingpeng@...cent.com,
	Jinliang Zheng <alexjlzheng@...cent.com>
Subject: Re: [PATCH] xfs: using mutex instead of semaphore for xfs_buf_lock()

On Fri, Dec 20, 2024 at 01:16:29AM +0800, Jinliang Zheng wrote:
> xfs_buf uses a semaphore for mutual exclusion, and its count value
> is initialized to 1, which is equivalent to a mutex.
> 
> However, mutex->owner can provide more information when analyzing
> vmcore, making it easier for us to identify which task currently
> holds the lock.

Does XFS pass buffers between tasks?  xfs_btree_split has that whole
blob of ugly code where it can pass a locked inode and transaction to a
workqueue function to avoid overrunning the kernel stack.

--D

> Signed-off-by: Jinliang Zheng <alexjlzheng@...cent.com>
> ---
>  fs/xfs/xfs_buf.c   |  9 +++++----
>  fs/xfs/xfs_buf.h   |  4 ++--
>  fs/xfs/xfs_trace.h | 25 +++++--------------------
>  3 files changed, 12 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index aa4dbda7b536..7c59d7905ea1 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -243,7 +243,8 @@ _xfs_buf_alloc(
>  	INIT_LIST_HEAD(&bp->b_lru);
>  	INIT_LIST_HEAD(&bp->b_list);
>  	INIT_LIST_HEAD(&bp->b_li_list);
> -	sema_init(&bp->b_sema, 0); /* held, no waiters */
> +	mutex_init(&bp->b_mutex);
> +	mutex_lock(&bp->b_mutex); /* held, no waiters */
>  	spin_lock_init(&bp->b_lock);
>  	bp->b_target = target;
>  	bp->b_mount = target->bt_mount;
> @@ -1168,7 +1169,7 @@ xfs_buf_trylock(
>  {
>  	int			locked;
>  
> -	locked = down_trylock(&bp->b_sema) == 0;
> +	locked = mutex_trylock(&bp->b_mutex);
>  	if (locked)
>  		trace_xfs_buf_trylock(bp, _RET_IP_);
>  	else
> @@ -1193,7 +1194,7 @@ xfs_buf_lock(
>  
>  	if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
>  		xfs_log_force(bp->b_mount, 0);
> -	down(&bp->b_sema);
> +	mutex_lock(&bp->b_mutex);
>  
>  	trace_xfs_buf_lock_done(bp, _RET_IP_);
>  }
> @@ -1204,7 +1205,7 @@ xfs_buf_unlock(
>  {
>  	ASSERT(xfs_buf_islocked(bp));
>  
> -	up(&bp->b_sema);
> +	mutex_unlock(&bp->b_mutex);
>  	trace_xfs_buf_unlock(bp, _RET_IP_);
>  }
>  
> diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> index b1580644501f..2c48e388d451 100644
> --- a/fs/xfs/xfs_buf.h
> +++ b/fs/xfs/xfs_buf.h
> @@ -171,7 +171,7 @@ struct xfs_buf {
>  	atomic_t		b_hold;		/* reference count */
>  	atomic_t		b_lru_ref;	/* lru reclaim ref count */
>  	xfs_buf_flags_t		b_flags;	/* status flags */
> -	struct semaphore	b_sema;		/* semaphore for lockables */
> +	struct mutex		b_mutex;	/* mutex for lockables */
>  
>  	/*
>  	 * concurrent access to b_lru and b_lru_flags are protected by
> @@ -304,7 +304,7 @@ extern int xfs_buf_trylock(struct xfs_buf *);
>  extern void xfs_buf_lock(struct xfs_buf *);
>  extern void xfs_buf_unlock(struct xfs_buf *);
>  #define xfs_buf_islocked(bp) \
> -	((bp)->b_sema.count <= 0)
> +	mutex_is_locked(&(bp)->b_mutex)
>  
>  static inline void xfs_buf_relse(struct xfs_buf *bp)
>  {
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 180ce697305a..ba6c003b82af 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -443,7 +443,6 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
>  		__field(int, nblks)
>  		__field(int, hold)
>  		__field(int, pincount)
> -		__field(unsigned, lockval)
>  		__field(unsigned, flags)
>  		__field(unsigned long, caller_ip)
>  		__field(const void *, buf_ops)
> @@ -454,19 +453,17 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
>  		__entry->nblks = bp->b_length;
>  		__entry->hold = atomic_read(&bp->b_hold);
>  		__entry->pincount = atomic_read(&bp->b_pin_count);
> -		__entry->lockval = bp->b_sema.count;
>  		__entry->flags = bp->b_flags;
>  		__entry->caller_ip = caller_ip;
>  		__entry->buf_ops = bp->b_ops;
>  	),
>  	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
> -		  "lock %d flags %s bufops %pS caller %pS",
> +		  "flags %s bufops %pS caller %pS",
>  		  MAJOR(__entry->dev), MINOR(__entry->dev),
>  		  (unsigned long long)__entry->bno,
>  		  __entry->nblks,
>  		  __entry->hold,
>  		  __entry->pincount,
> -		  __entry->lockval,
>  		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
>  		  __entry->buf_ops,
>  		  (void *)__entry->caller_ip)
> @@ -514,7 +511,6 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
>  		__field(unsigned int, length)
>  		__field(int, hold)
>  		__field(int, pincount)
> -		__field(unsigned, lockval)
>  		__field(unsigned, flags)
>  		__field(unsigned long, caller_ip)
>  	),
> @@ -525,17 +521,15 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
>  		__entry->flags = flags;
>  		__entry->hold = atomic_read(&bp->b_hold);
>  		__entry->pincount = atomic_read(&bp->b_pin_count);
> -		__entry->lockval = bp->b_sema.count;
>  		__entry->caller_ip = caller_ip;
>  	),
>  	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
> -		  "lock %d flags %s caller %pS",
> +		  "flags %s caller %pS",
>  		  MAJOR(__entry->dev), MINOR(__entry->dev),
>  		  (unsigned long long)__entry->bno,
>  		  __entry->length,
>  		  __entry->hold,
>  		  __entry->pincount,
> -		  __entry->lockval,
>  		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
>  		  (void *)__entry->caller_ip)
>  )
> @@ -558,7 +552,6 @@ TRACE_EVENT(xfs_buf_ioerror,
>  		__field(unsigned, flags)
>  		__field(int, hold)
>  		__field(int, pincount)
> -		__field(unsigned, lockval)
>  		__field(int, error)
>  		__field(xfs_failaddr_t, caller_ip)
>  	),
> @@ -568,19 +561,17 @@ TRACE_EVENT(xfs_buf_ioerror,
>  		__entry->length = bp->b_length;
>  		__entry->hold = atomic_read(&bp->b_hold);
>  		__entry->pincount = atomic_read(&bp->b_pin_count);
> -		__entry->lockval = bp->b_sema.count;
>  		__entry->error = error;
>  		__entry->flags = bp->b_flags;
>  		__entry->caller_ip = caller_ip;
>  	),
>  	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
> -		  "lock %d error %d flags %s caller %pS",
> +		  "error %d flags %s caller %pS",
>  		  MAJOR(__entry->dev), MINOR(__entry->dev),
>  		  (unsigned long long)__entry->bno,
>  		  __entry->length,
>  		  __entry->hold,
>  		  __entry->pincount,
> -		  __entry->lockval,
>  		  __entry->error,
>  		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
>  		  (void *)__entry->caller_ip)
> @@ -595,7 +586,6 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
>  		__field(unsigned int, buf_len)
>  		__field(int, buf_hold)
>  		__field(int, buf_pincount)
> -		__field(int, buf_lockval)
>  		__field(unsigned, buf_flags)
>  		__field(unsigned, bli_recur)
>  		__field(int, bli_refcount)
> @@ -612,18 +602,16 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
>  		__entry->buf_flags = bip->bli_buf->b_flags;
>  		__entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
>  		__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
> -		__entry->buf_lockval = bip->bli_buf->b_sema.count;
>  		__entry->li_flags = bip->bli_item.li_flags;
>  	),
>  	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
> -		  "lock %d flags %s recur %d refcount %d bliflags %s "
> +		  "flags %s recur %d refcount %d bliflags %s "
>  		  "liflags %s",
>  		  MAJOR(__entry->dev), MINOR(__entry->dev),
>  		  (unsigned long long)__entry->buf_bno,
>  		  __entry->buf_len,
>  		  __entry->buf_hold,
>  		  __entry->buf_pincount,
> -		  __entry->buf_lockval,
>  		  __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
>  		  __entry->bli_recur,
>  		  __entry->bli_refcount,
> @@ -4802,7 +4790,6 @@ DECLARE_EVENT_CLASS(xfbtree_buf_class,
>  		__field(int, nblks)
>  		__field(int, hold)
>  		__field(int, pincount)
> -		__field(unsigned int, lockval)
>  		__field(unsigned int, flags)
>  	),
>  	TP_fast_assign(
> @@ -4811,16 +4798,14 @@ DECLARE_EVENT_CLASS(xfbtree_buf_class,
>  		__entry->nblks = bp->b_length;
>  		__entry->hold = atomic_read(&bp->b_hold);
>  		__entry->pincount = atomic_read(&bp->b_pin_count);
> -		__entry->lockval = bp->b_sema.count;
>  		__entry->flags = bp->b_flags;
>  	),
> -	TP_printk("xfino 0x%lx daddr 0x%llx bbcount 0x%x hold %d pincount %d lock %d flags %s",
> +	TP_printk("xfino 0x%lx daddr 0x%llx bbcount 0x%x hold %d pincount %d flags %s",
>  		  __entry->xfino,
>  		  (unsigned long long)__entry->bno,
>  		  __entry->nblks,
>  		  __entry->hold,
>  		  __entry->pincount,
> -		  __entry->lockval,
>  		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS))
>  )
>  
> -- 
> 2.41.1
> 
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ