lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date: Wed, 21 Feb 2024 14:31:51 +0100
From: Jan Kara <jack@...e.cz>
To: Chuck Lever <cel@...nel.org>
Cc: viro@...iv.linux.org.uk, brauner@...nel.org, jack@...e.cz,
	hughd@...gle.com, akpm@...ux-foundation.org,
	Liam.Howlett@...cle.com, oliver.sang@...el.com, feng.tang@...el.com,
	linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org,
	maple-tree@...ts.infradead.org, linux-mm@...ck.org, lkp@...el.com
Subject: Re: [PATCH v2 6/6] libfs: Convert simple directory offsets to use a
 Maple Tree

On Sat 17-02-24 15:24:16, Chuck Lever wrote:
> From: Chuck Lever <chuck.lever@...cle.com>
> 
> Test robot reports:
> > kernel test robot noticed a -19.0% regression of aim9.disk_src.ops_per_sec on:
> >
> > commit: a2e459555c5f9da3e619b7e47a63f98574dc75f1 ("shmem: stable directory offsets")
> > https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git master
> 
> Feng Tang further clarifies that:
> > ... the new simple_offset_add()
> > called by shmem_mknod() brings extra cost related with slab,
> > specifically the 'radix_tree_node', which cause the regression.
> 
> Willy's analysis is that, over time, the test workload causes
> xa_alloc_cyclic() to fragment the underlying SLAB cache.
> 
> This patch replaces the offset_ctx's xarray with a Maple Tree in the
> hope that Maple Tree's dense node mode will handle this scenario
> more scalably.
> 
> In addition, we can widen the simple directory offset maximum to
> signed long (as loff_t is also signed).
> 
> Suggested-by: Matthew Wilcox <willy@...radead.org>
> Reported-by: kernel test robot <oliver.sang@...el.com>
> Closes: https://lore.kernel.org/oe-lkp/202309081306.3ecb3734-oliver.sang@intel.com
> Signed-off-by: Chuck Lever <chuck.lever@...cle.com>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <jack@...e.cz>

								Honza

> ---
>  fs/libfs.c         |   47 +++++++++++++++++++++++------------------------
>  include/linux/fs.h |    5 +++--
>  2 files changed, 26 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/libfs.c b/fs/libfs.c
> index f7f92a49a418..d3d31197c8e4 100644
> --- a/fs/libfs.c
> +++ b/fs/libfs.c
> @@ -245,17 +245,17 @@ enum {
>  	DIR_OFFSET_MIN	= 2,
>  };
>  
> -static void offset_set(struct dentry *dentry, u32 offset)
> +static void offset_set(struct dentry *dentry, long offset)
>  {
> -	dentry->d_fsdata = (void *)((uintptr_t)(offset));
> +	dentry->d_fsdata = (void *)offset;
>  }
>  
> -static u32 dentry2offset(struct dentry *dentry)
> +static long dentry2offset(struct dentry *dentry)
>  {
> -	return (u32)((uintptr_t)(dentry->d_fsdata));
> +	return (long)dentry->d_fsdata;
>  }
>  
> -static struct lock_class_key simple_offset_xa_lock;
> +static struct lock_class_key simple_offset_lock_class;
>  
>  /**
>   * simple_offset_init - initialize an offset_ctx
> @@ -264,8 +264,8 @@ static struct lock_class_key simple_offset_xa_lock;
>   */
>  void simple_offset_init(struct offset_ctx *octx)
>  {
> -	xa_init_flags(&octx->xa, XA_FLAGS_ALLOC1);
> -	lockdep_set_class(&octx->xa.xa_lock, &simple_offset_xa_lock);
> +	mt_init_flags(&octx->mt, MT_FLAGS_ALLOC_RANGE);
> +	lockdep_set_class(&octx->mt.ma_lock, &simple_offset_lock_class);
>  	octx->next_offset = DIR_OFFSET_MIN;
>  }
>  
> @@ -274,20 +274,19 @@ void simple_offset_init(struct offset_ctx *octx)
>   * @octx: directory offset ctx to be updated
>   * @dentry: new dentry being added
>   *
> - * Returns zero on success. @so_ctx and the dentry offset are updated.
> + * Returns zero on success. @octx and the dentry's offset are updated.
>   * Otherwise, a negative errno value is returned.
>   */
>  int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
>  {
> -	static const struct xa_limit limit = XA_LIMIT(DIR_OFFSET_MIN, U32_MAX);
> -	u32 offset;
> +	unsigned long offset;
>  	int ret;
>  
>  	if (dentry2offset(dentry) != 0)
>  		return -EBUSY;
>  
> -	ret = xa_alloc_cyclic(&octx->xa, &offset, dentry, limit,
> -			      &octx->next_offset, GFP_KERNEL);
> +	ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN,
> +				 LONG_MAX, &octx->next_offset, GFP_KERNEL);
>  	if (ret < 0)
>  		return ret;
>  
> @@ -303,13 +302,13 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
>   */
>  void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
>  {
> -	u32 offset;
> +	long offset;
>  
>  	offset = dentry2offset(dentry);
>  	if (offset == 0)
>  		return;
>  
> -	xa_erase(&octx->xa, offset);
> +	mtree_erase(&octx->mt, offset);
>  	offset_set(dentry, 0);
>  }
>  
> @@ -332,7 +331,7 @@ int simple_offset_empty(struct dentry *dentry)
>  
>  	index = DIR_OFFSET_MIN;
>  	octx = inode->i_op->get_offset_ctx(inode);
> -	xa_for_each(&octx->xa, index, child) {
> +	mt_for_each(&octx->mt, child, index, LONG_MAX) {
>  		spin_lock(&child->d_lock);
>  		if (simple_positive(child)) {
>  			spin_unlock(&child->d_lock);
> @@ -362,8 +361,8 @@ int simple_offset_rename_exchange(struct inode *old_dir,
>  {
>  	struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
>  	struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
> -	u32 old_index = dentry2offset(old_dentry);
> -	u32 new_index = dentry2offset(new_dentry);
> +	long old_index = dentry2offset(old_dentry);
> +	long new_index = dentry2offset(new_dentry);
>  	int ret;
>  
>  	simple_offset_remove(old_ctx, old_dentry);
> @@ -389,9 +388,9 @@ int simple_offset_rename_exchange(struct inode *old_dir,
>  
>  out_restore:
>  	offset_set(old_dentry, old_index);
> -	xa_store(&old_ctx->xa, old_index, old_dentry, GFP_KERNEL);
> +	mtree_store(&old_ctx->mt, old_index, old_dentry, GFP_KERNEL);
>  	offset_set(new_dentry, new_index);
> -	xa_store(&new_ctx->xa, new_index, new_dentry, GFP_KERNEL);
> +	mtree_store(&new_ctx->mt, new_index, new_dentry, GFP_KERNEL);
>  	return ret;
>  }
>  
> @@ -404,7 +403,7 @@ int simple_offset_rename_exchange(struct inode *old_dir,
>   */
>  void simple_offset_destroy(struct offset_ctx *octx)
>  {
> -	xa_destroy(&octx->xa);
> +	mtree_destroy(&octx->mt);
>  }
>  
>  /**
> @@ -434,16 +433,16 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
>  
>  	/* In this case, ->private_data is protected by f_pos_lock */
>  	file->private_data = NULL;
> -	return vfs_setpos(file, offset, U32_MAX);
> +	return vfs_setpos(file, offset, LONG_MAX);
>  }
>  
>  static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
>  {
> +	MA_STATE(mas, &octx->mt, offset, offset);
>  	struct dentry *child, *found = NULL;
> -	XA_STATE(xas, &octx->xa, offset);
>  
>  	rcu_read_lock();
> -	child = xas_next_entry(&xas, U32_MAX);
> +	child = mas_find(&mas, LONG_MAX);
>  	if (!child)
>  		goto out;
>  	spin_lock(&child->d_lock);
> @@ -457,8 +456,8 @@ static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
>  
>  static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
>  {
> -	u32 offset = dentry2offset(dentry);
>  	struct inode *inode = d_inode(dentry);
> +	long offset = dentry2offset(dentry);
>  
>  	return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
>  			  inode->i_ino, fs_umode_to_dtype(inode->i_mode));
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 03d141809a2c..55144c12ee0f 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -43,6 +43,7 @@
>  #include <linux/cred.h>
>  #include <linux/mnt_idmapping.h>
>  #include <linux/slab.h>
> +#include <linux/maple_tree.h>
>  
>  #include <asm/byteorder.h>
>  #include <uapi/linux/fs.h>
> @@ -3260,8 +3261,8 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
>  		const void __user *from, size_t count);
>  
>  struct offset_ctx {
> -	struct xarray		xa;
> -	u32			next_offset;
> +	struct maple_tree	mt;
> +	unsigned long		next_offset;
>  };
>  
>  void simple_offset_init(struct offset_ctx *octx);
> 
> 
-- 
Jan Kara <jack@...e.com>
SUSE Labs, CR

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ