linux-kernel - Re: [PATCH rdma-next 4/7] RDMA/mlx5: Change the cache structure to an RB-tree

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20211208205830.GH6385@nvidia.com>
Date:   Wed, 8 Dec 2021 16:58:30 -0400
From:   Jason Gunthorpe <jgg@...dia.com>
To:     Leon Romanovsky <leon@...nel.org>
Cc:     Aharon Landau <aharonl@...dia.com>, linux-kernel@...r.kernel.org,
        linux-rdma@...r.kernel.org
Subject: Re: [PATCH rdma-next 4/7] RDMA/mlx5: Change the cache structure to
 an RB-tree

On Mon, Dec 06, 2021 at 11:10:49AM +0200, Leon Romanovsky wrote:

> +static struct mlx5_cache_ent *ent_search(struct mlx5_mr_cache *cache, void *mkc)

mlx5_cache_ent_find() ?

This search isn't really what I would expect, it should stop if it
finds a (somewhat) larger and otherwise compatible entry instead of
continuing to bin on power of two sizes.

> +	struct rb_node *node = cache->cache_root.rb_node;
> +	int size = MLX5_ST_SZ_BYTES(mkc);

size_t not int

> +	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
> +	if (dev->cache.maintained_cache && !force) {
> +		int order;

unsigned int

> +		/*
> +		 * Try to get an mkey from pool.
> +		 */
> +		order = order_base_2(ndescs) > 2 ? order_base_2(ndescs) : 2;
> +		MLX5_SET(mkc, mkc, translations_octword_size,
> +			 get_mkc_octo_size(access_mode, 1 << order));
> +		mutex_lock(&dev->cache.cache_lock);
> +		ent = ent_search(&dev->cache, mkc);
> +		mutex_unlock(&dev->cache.cache_lock);

Why is it OK to drop the lock here? What is protecting the ent pointer
against free?

> +	if (ent && (ent->limit || force)) {

What locking protects ent->limit ?

> +		xa_lock_irq(&ent->mkeys);
> +		if (!ent->stored) {
> +			if (ent->limit) {
> +				queue_adjust_cache_locked(ent);
> +				ent->miss++;
> +			}
> +			xa_unlock_irq(&ent->mkeys);
> +
> +			err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
> +			if (err)
> +				goto err;

So if there is no entry we create a bigger one rounded up to pow2?

> +
> +			WRITE_ONCE(ent->dev->cache.last_add, jiffies);
> +			xa_lock_irq(&ent->mkeys);
> +			ent->total_mrs++;
> +			xa_unlock_irq(&ent->mkeys);

May as well optimize for the normal case, do the total_mrs++ before
create_mkey while we still have the lock and undo it if it fails. It
is just minor stat reporting right?

> +		} else {
> +			xa_ent = __xa_store(&ent->mkeys, --ent->stored,
> +					  NULL, GFP_KERNEL);
> +			WARN_ON(xa_ent == NULL || xa_is_err(xa_ent));
> +			WARN_ON(__xa_erase(&ent->mkeys, --ent->reserved) !=
> +				NULL);

This whole bigger block want to be in its own function 'mlx5_ent_get_mkey()'

Then you can write it simpler without all the duplicated error
handling and deep indenting

>  			queue_adjust_cache_locked(ent);
> -			ent->miss++;
> -		}
> -		xa_unlock_irq(&ent->mkeys);
> -		err = create_cache_mkey(ent, &mr->mmkey.key);
> -		if (err) {
> -			kfree(mr);
> -			return ERR_PTR(err);
> +			xa_unlock_irq(&ent->mkeys);
> +			mr->mmkey.key = (u32)xa_to_value(xa_ent);
>  		}
> +		mr->cache_ent = ent;
>  	} else {
> -		mr = __xa_store(&ent->mkeys, --ent->stored, NULL,
> -				GFP_KERNEL);
> -		WARN_ON(mr == NULL || xa_is_err(mr));
> -		WARN_ON(__xa_erase(&ent->mkeys, --ent->reserved) != NULL);
> -		queue_adjust_cache_locked(ent);
> -		xa_unlock_irq(&ent->mkeys);
> -
> -		mr->mmkey.key = (u32)xa_to_value(xa_ent);
> +		/*
> +		 * Can not use a cache mkey.
> +		 * Create an mkey with the exact needed size.
> +		 */
> +		MLX5_SET(mkc, mkc, translations_octword_size,
> +			 get_mkc_octo_size(access_mode, ndescs));
> +		err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
> +		if (err)
> +			goto err;
>  	}

I think this needs to be broken to functions, it is hard to read with
all these ifs and inlined locking

>  
> +static int ent_insert(struct mlx5_mr_cache *cache, struct mlx5_cache_ent *ent)
> +{
> +	struct rb_node **new = &cache->cache_root.rb_node, *parent = NULL;
> +	int size = MLX5_ST_SZ_BYTES(mkc);
> +	struct mlx5_cache_ent *cur;
> +	int cmp;
> +
> +	/* Figure out where to put new node */
> +	while (*new) {
> +		cur = container_of(*new, struct mlx5_cache_ent, node);
> +		parent = *new;
> +		cmp = memcmp(ent->mkc, cur->mkc, size);
> +		if (cmp < 0)
> +			new = &((*new)->rb_left);
> +		else if (cmp > 0)
> +			new = &((*new)->rb_right);
> +		else
> +			return -EEXIST;
> +	}
> +
> +	/* Add new node and rebalance tree. */
> +	rb_link_node(&ent->node, parent, new);
> +	rb_insert_color(&ent->node, &cache->cache_root);
> +
> +	return 0;
> +}

This should be near the find

> +
> +static struct mlx5_cache_ent *mlx5_ib_create_cache_ent(struct mlx5_ib_dev *dev,
> +						       int order)
> +{
> +	struct mlx5_cache_ent *ent;
> +	int ret;
> +
> +	ent = kzalloc(sizeof(*ent), GFP_KERNEL);
> +	if (!ent)
> +		return ERR_PTR(-ENOMEM);
> +
> +	ent->mkc = kzalloc(MLX5_ST_SZ_BYTES(mkc), GFP_KERNEL);
> +	if (!ent->mkc) {
> +		kfree(ent);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	ent->ndescs = 1 << order;
> +	mlx5_set_cache_mkc(dev, ent->mkc, 0, MLX5_MKC_ACCESS_MODE_MTT,
> +			   PAGE_SHIFT);
> +	MLX5_SET(mkc, ent->mkc, translations_octword_size,
> +		 get_mkc_octo_size(MLX5_MKC_ACCESS_MODE_MTT, ent->ndescs));
> +	mutex_lock(&dev->cache.cache_lock);
> +	ret = ent_insert(&dev->cache, ent);
> +	mutex_unlock(&dev->cache.cache_lock);
> +	if (ret) {
> +		kfree(ent->mkc);
> +		kfree(ent);
> +		return ERR_PTR(ret);
> +	}
> +
> +	xa_init_flags(&ent->mkeys, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
> +	ent->dev = dev;
> +
> +	INIT_WORK(&ent->work, cache_work_func);
> +	INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);

And the ent should be fully setup before adding it to the cache

Jason