[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251116-frmr_pools-v1-2-5eb3c8f5c9c4@nvidia.com>
Date: Sun, 16 Nov 2025 21:10:23 +0200
From: Edward Srouji <edwards@...dia.com>
To: Jason Gunthorpe <jgg@...pe.ca>, Leon Romanovsky <leon@...nel.org>, "Saeed
Mahameed" <saeedm@...dia.com>, Tariq Toukan <tariqt@...dia.com>, Mark Bloch
<mbloch@...dia.com>, Andrew Lunn <andrew+netdev@...n.ch>, "David S. Miller"
<davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski
<kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>
CC: <linux-kernel@...r.kernel.org>, <linux-rdma@...r.kernel.org>,
<netdev@...r.kernel.org>, Michael Guralnik <michaelgur@...dia.com>, "Edward
Srouji" <edwards@...dia.com>, Yishai Hadas <yishaih@...dia.com>
Subject: [PATCH rdma-next 2/9] RDMA/core: Add aging to FRMR pools
From: Michael Guralnik <michaelgur@...dia.com>
Add aging mechanism to handles of FRMR pools.
Keep the handles stored in FRMR pools for at least 1 minute for
application to reuse, destroy all handles which were not reused.
Add a new queue to each pool to accomplish that.
Upon aging trigger, destroy all FRMR handles from the new 'inactive'
queue and move all handles from the 'active' pool to the 'inactive' pool.
This ensures all destroyed handles were not reused for at least one aging
time period and were not held longer than 2 aging time periods.
Handles from the inactive queue will be popped only if the active queue is
empty.
Signed-off-by: Michael Guralnik <michaelgur@...dia.com>
Reviewed-by: Yishai Hadas <yishaih@...dia.com>
Signed-off-by: Edward Srouji <edwards@...dia.com>
---
drivers/infiniband/core/frmr_pools.c | 84 ++++++++++++++++++++++++++++++++----
drivers/infiniband/core/frmr_pools.h | 7 +++
2 files changed, 82 insertions(+), 9 deletions(-)
diff --git a/drivers/infiniband/core/frmr_pools.c b/drivers/infiniband/core/frmr_pools.c
index 073b2fcfb2cc7d466fedfba14ad04f1e2d7edf65..406664a6e2099b2a7827e12a40820ecab75cb59c 100644
--- a/drivers/infiniband/core/frmr_pools.c
+++ b/drivers/infiniband/core/frmr_pools.c
@@ -7,9 +7,12 @@
#include <linux/rbtree.h>
#include <linux/spinlock.h>
#include <rdma/ib_verbs.h>
+#include <linux/timer.h>
#include "frmr_pools.h"
+#define FRMR_POOLS_DEFAULT_AGING_PERIOD_SECS 60
+
static int push_handle_to_queue_locked(struct frmr_queue *queue, u32 handle)
{
u32 tmp = queue->ci % NUM_HANDLES_PER_PAGE;
@@ -79,19 +82,58 @@ static bool pop_frmr_handles_page(struct ib_frmr_pool *pool,
return true;
}
-static void destroy_frmr_pool(struct ib_device *device,
- struct ib_frmr_pool *pool)
+static void destroy_all_handles_in_queue(struct ib_device *device,
+ struct ib_frmr_pool *pool,
+ struct frmr_queue *queue)
{
struct ib_frmr_pools *pools = device->frmr_pools;
struct frmr_handles_page *page;
u32 count;
- while (pop_frmr_handles_page(pool, &pool->queue, &page, &count)) {
+ while (pop_frmr_handles_page(pool, queue, &page, &count)) {
pools->pool_ops->destroy_frmrs(device, page->handles, count);
kfree(page);
}
+}
+
+static void pool_aging_work(struct work_struct *work)
+{
+ struct ib_frmr_pool *pool = container_of(
+ to_delayed_work(work), struct ib_frmr_pool, aging_work);
+ struct ib_frmr_pools *pools = pool->device->frmr_pools;
+ bool has_work = false;
+
+ destroy_all_handles_in_queue(pool->device, pool, &pool->inactive_queue);
+
+ /* Move all pages from regular queue to inactive queue */
+ spin_lock(&pool->lock);
+ if (pool->queue.ci > 0) {
+ list_splice_tail_init(&pool->queue.pages_list,
+ &pool->inactive_queue.pages_list);
+ pool->inactive_queue.num_pages = pool->queue.num_pages;
+ pool->inactive_queue.ci = pool->queue.ci;
+
+ pool->queue.num_pages = 0;
+ pool->queue.ci = 0;
+ has_work = true;
+ }
+ spin_unlock(&pool->lock);
+
+ /* Reschedule if there are handles to age in next aging period */
+ if (has_work)
+ queue_delayed_work(
+ pools->aging_wq, &pool->aging_work,
+ secs_to_jiffies(FRMR_POOLS_DEFAULT_AGING_PERIOD_SECS));
+}
+
+static void destroy_frmr_pool(struct ib_device *device,
+ struct ib_frmr_pool *pool)
+{
+ cancel_delayed_work_sync(&pool->aging_work);
+ destroy_all_handles_in_queue(device, pool, &pool->queue);
+ destroy_all_handles_in_queue(device, pool, &pool->inactive_queue);
- rb_erase(&pool->node, &pools->rb_root);
+ rb_erase(&pool->node, &device->frmr_pools->rb_root);
kfree(pool);
}
@@ -115,6 +157,11 @@ int ib_frmr_pools_init(struct ib_device *device,
pools->rb_root = RB_ROOT;
rwlock_init(&pools->rb_lock);
pools->pool_ops = pool_ops;
+ pools->aging_wq = create_singlethread_workqueue("frmr_aging_wq");
+ if (!pools->aging_wq) {
+ kfree(pools);
+ return -ENOMEM;
+ }
device->frmr_pools = pools;
return 0;
@@ -145,6 +192,7 @@ void ib_frmr_pools_cleanup(struct ib_device *device)
node = next;
}
+ destroy_workqueue(pools->aging_wq);
kfree(pools);
device->frmr_pools = NULL;
}
@@ -226,7 +274,10 @@ static struct ib_frmr_pool *create_frmr_pool(struct ib_device *device,
memcpy(&pool->key, key, sizeof(*key));
INIT_LIST_HEAD(&pool->queue.pages_list);
+ INIT_LIST_HEAD(&pool->inactive_queue.pages_list);
spin_lock_init(&pool->lock);
+ INIT_DELAYED_WORK(&pool->aging_work, pool_aging_work);
+ pool->device = device;
write_lock(&pools->rb_lock);
while (*new) {
@@ -265,11 +316,17 @@ static int get_frmr_from_pool(struct ib_device *device,
spin_lock(&pool->lock);
if (pool->queue.ci == 0) {
- spin_unlock(&pool->lock);
- err = pools->pool_ops->create_frmrs(device, &pool->key, &handle,
- 1);
- if (err)
- return err;
+ if (pool->inactive_queue.ci > 0) {
+ handle = pop_handle_from_queue_locked(
+ &pool->inactive_queue);
+ spin_unlock(&pool->lock);
+ } else {
+ spin_unlock(&pool->lock);
+ err = pools->pool_ops->create_frmrs(device, &pool->key,
+ &handle, 1);
+ if (err)
+ return err;
+ }
} else {
handle = pop_handle_from_queue_locked(&pool->queue);
spin_unlock(&pool->lock);
@@ -317,12 +374,21 @@ EXPORT_SYMBOL(ib_frmr_pool_pop);
int ib_frmr_pool_push(struct ib_device *device, struct ib_mr *mr)
{
struct ib_frmr_pool *pool = mr->frmr.pool;
+ struct ib_frmr_pools *pools = device->frmr_pools;
+ bool schedule_aging = false;
int ret;
spin_lock(&pool->lock);
+ /* Schedule aging every time an empty pool becomes non-empty */
+ if (pool->queue.ci == 0)
+ schedule_aging = true;
ret = push_handle_to_queue_locked(&pool->queue, mr->frmr.handle);
spin_unlock(&pool->lock);
+ if (ret == 0 && schedule_aging)
+ queue_delayed_work(pools->aging_wq, &pool->aging_work,
+ secs_to_jiffies(FRMR_POOLS_DEFAULT_AGING_PERIOD_SECS));
+
return ret;
}
EXPORT_SYMBOL(ib_frmr_pool_push);
diff --git a/drivers/infiniband/core/frmr_pools.h b/drivers/infiniband/core/frmr_pools.h
index 5a4d03b3d86f431c3f2091dd5ab27292547c2030..a20323e03e3f446856dda921811e2359232e0b82 100644
--- a/drivers/infiniband/core/frmr_pools.h
+++ b/drivers/infiniband/core/frmr_pools.h
@@ -11,6 +11,7 @@
#include <linux/spinlock_types.h>
#include <linux/types.h>
#include <asm/page.h>
+#include <linux/workqueue.h>
#define NUM_HANDLES_PER_PAGE \
((PAGE_SIZE - sizeof(struct list_head)) / sizeof(u32))
@@ -37,12 +38,18 @@ struct ib_frmr_pool {
/* Protect access to the queue */
spinlock_t lock;
struct frmr_queue queue;
+ struct frmr_queue inactive_queue;
+
+ struct delayed_work aging_work;
+ struct ib_device *device;
};
struct ib_frmr_pools {
struct rb_root rb_root;
rwlock_t rb_lock;
const struct ib_frmr_pool_ops *pool_ops;
+
+ struct workqueue_struct *aging_wq;
};
#endif /* RDMA_CORE_FRMR_POOLS_H */
--
2.47.1
Powered by blists - more mailing lists