[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251217084422.4875-7-15927021679@163.com>
Date: Wed, 17 Dec 2025 16:43:31 +0800
From: Xiong Weimin <15927021679@....com>
To: Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
"David S . Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Jesper Dangaard Brouer <hawk@...nel.org>,
John Fastabend <john.fastabend@...il.com>,
Stanislav Fomichev <sdf@...ichev.me>
Cc: linux-kernel@...r.kernel.org,
netdev@...r.kernel.org,
xiongweimin <xiongweimin@...inos.cn>
Subject: [PATCH 05/14] examples/vhost_user_rdma: implement comprehensive memory region management
From: xiongweimin <xiongweimin@...inos.cn>
This commit adds core functionality for RDMA Memory Region (MR) handling:
1. DMA MR registration for physical memory access
2. Pre-allocated MR creation for optimized buffer handling
3. User-space MR registration with GPA->VVA translation
4. MR deregistration with reference-counted cleanup
5. Secure key generation and validation mechanisms
Key features:
- Random lkey/rkey generation with collision avoidance
- Three MR types: DMA, pre-allocated, and user-mapped
- Page mapping for user-space memory regions
- State management (VALID/ZOMBIE) for safe deregistration
- Reference counting integration with PDs
- Comprehensive error handling and logging
Signed-off-by: Xiong Weimin<xiongweimin@...inos.cn>
Change-Id: I4c26d47181f895c05b8ba125fdf0959bd0827d99
---
examples/vhost_user_rdma/vhost_rdma_ib.c | 199 +++++++++++++++++++++++
examples/vhost_user_rdma/vhost_rdma_ib.h | 74 +++++++++
2 files changed, 273 insertions(+)
diff --git a/examples/vhost_user_rdma/vhost_rdma_ib.c b/examples/vhost_user_rdma/vhost_rdma_ib.c
index e590b555d3..3002498151 100644
--- a/examples/vhost_user_rdma/vhost_rdma_ib.c
+++ b/examples/vhost_user_rdma/vhost_rdma_ib.c
@@ -18,6 +18,7 @@
#include <rte_ethdev.h>
#include <rte_spinlock.h>
#include <rte_malloc.h>
+#include <rte_random.h>
#include "vhost_rdma.h"
#include "vhost_rdma_ib.h"
@@ -652,6 +653,200 @@ vhost_rdma_destroy_pd(struct vhost_rdma_device *dev, struct iovec *in, CTRL_NO_R
return 0;
}
+uint8_t
+vhost_rdma_get_next_key(uint32_t last_key)
+{
+ uint8_t key;
+
+ do {
+ key = rte_rand();
+ } while (key == last_key);
+
+ return key;
+}
+
+void
+vhost_rdma_mr_init_key(struct vhost_rdma_mr *mr, uint32_t mrn)
+{
+ uint32_t lkey = mrn << 8 | vhost_rdma_get_next_key(-1);
+ uint32_t rkey = (mr->access & VHOST_RDMA_IB_ACCESS_REMOTE) ? lkey : 0;
+
+ mr->lkey = lkey;
+ mr->rkey = rkey;
+}
+
+static int
+vhost_rdma_get_dma_mr(struct vhost_rdma_device *dev, struct iovec *in,
+ struct iovec *out)
+{
+ struct vhost_rdma_cmd_get_dma_mr *get_cmd;
+ struct vhost_rdma_ack_get_dma_mr *ack_rsp;
+ struct vhost_rdma_pd *pd;
+ struct vhost_rdma_mr *mr;
+ uint32_t mrn;
+
+ CHK_IOVEC(get_cmd, in);
+ CHK_IOVEC(ack_rsp, out);
+
+ pd = vhost_rdma_pool_get(&dev->pd_pool, get_cmd->pdn);
+ if (unlikely(pd == NULL)) {
+ RDMA_LOG_ERR("pd is not found");
+ return -EINVAL;
+ }
+
+ mr = vhost_rdma_pool_alloc(&dev->mr_pool, &mrn);
+ if (mr == NULL) {
+ RDMA_LOG_ERR("mr alloc failed");
+ return -ENOMEM;
+ }
+
+ vhost_rdma_ref_init(mr);
+ vhost_rdma_add_ref(pd);
+
+ mr->type = VHOST_MR_TYPE_DMA;
+ mr->state = VHOST_MR_STATE_VALID;
+ mr->access = get_cmd->access_flags;
+ mr->pd = pd;
+ vhost_rdma_mr_init_key(mr, mrn);
+ mr->mrn = mrn;
+
+ ack_rsp->lkey = mr->lkey;
+ ack_rsp->rkey = mr->rkey;
+ ack_rsp->mrn = mrn;
+
+ return 0;
+}
+
+static int
+vhost_rdma_alloc_mr(struct vhost_rdma_device *dev, struct iovec *in,
+ struct iovec *out)
+{
+ struct vhost_rdma_cmd_alloc_mr *alloc_cmd;
+ struct vhost_rdma_ack_get_dma_mr *ack_rsp;
+ struct vhost_rdma_pd *pd;
+ struct vhost_rdma_mr *mr;
+ uint32_t mrn;
+
+ CHK_IOVEC(alloc_cmd, in);
+ CHK_IOVEC(ack_rsp, out);
+
+ pd = vhost_rdma_pool_get(&dev->pd_pool, alloc_cmd->pdn);
+ if (unlikely(pd == NULL)) {
+ RDMA_LOG_ERR("pd is not found");
+ return -EINVAL;
+ }
+
+ mr = vhost_rdma_pool_alloc(&dev->mr_pool, &mrn);
+ if (mr == NULL) {
+ RDMA_LOG_ERR("mr alloc failed");
+ return -ENOMEM;
+ }
+
+ vhost_rdma_ref_init(mr);
+ vhost_rdma_add_ref(pd);
+
+ mr->type = VHOST_MR_TYPE_DMA;
+ mr->state = VHOST_MR_STATE_VALID;
+ mr->access = alloc_cmd->access_flags;
+ mr->pd = pd;
+ mr->max_pages = alloc_cmd->max_num_sg;
+ vhost_rdma_mr_init_key(mr, mrn);
+ mr->mrn = mrn;
+
+ ack_rsp->lkey = mr->lkey;
+ ack_rsp->rkey = mr->rkey;
+ ack_rsp->mrn = mrn;
+
+ return 0;
+}
+
+void
+vhost_rdma_map_pages(struct rte_vhost_memory *mem, uint64_t *pages,
+ uint64_t *dma_pages, uint32_t npages)
+{
+ uint32_t i;
+ uint64_t len = USER_MMAP_TARGET_PAGE_SIZE;
+
+ for (i = 0; i < npages; i++) {
+ pages[i] = gpa_to_vva(mem, dma_pages[i], &len);
+ assert(len == USER_MMAP_TARGET_PAGE_SIZE);
+ }
+}
+
+static int
+vhost_rdma_reg_user_mr(struct vhost_rdma_device *dev, struct iovec *in,
+ struct iovec *out)
+{
+ struct vhost_rdma_cmd_reg_user_mr *reg_cmd;
+ struct vhost_rdma_ack_reg_user_mr *ack_rsp;
+ struct vhost_rdma_mr *mr;
+ struct vhost_rdma_pd *pd;
+ uint32_t mrn;
+
+ CHK_IOVEC(reg_cmd, in);
+ CHK_IOVEC(ack_rsp, out);
+
+ pd = vhost_rdma_pool_get(&dev->pd_pool, reg_cmd->pdn);
+ if (unlikely(pd == NULL)) {
+ RDMA_LOG_ERR("pd is not found");
+ return -EINVAL;
+ }
+
+ mr = vhost_rdma_pool_alloc(&dev->mr_pool, &mrn);
+ if (mr == NULL) {
+ return -ENOMEM;
+ }
+
+ mr->pages = malloc(sizeof(uint64_t) * reg_cmd->npages);
+ if (mr->pages == NULL) {
+ return -ENOMEM;
+ }
+
+ vhost_rdma_ref_init(mr);
+ vhost_rdma_add_ref(pd);
+
+ vhost_rdma_map_pages(dev->mem, mr->pages, (uint64_t *)reg_cmd->pages, reg_cmd->npages);
+
+ mr->pd = pd;
+ mr->access = reg_cmd->access_flags;
+ mr->length = reg_cmd->length;
+ mr->iova = reg_cmd->virt_addr & USER_MMAP_PAGE_MASK;
+ mr->npages = reg_cmd->npages;
+ mr->type = VHOST_MR_TYPE_MR;
+ mr->state = VHOST_MR_STATE_VALID;
+ vhost_rdma_mr_init_key(mr, mrn);
+ mr->mrn = mrn;
+
+ ack_rsp->lkey = mr->lkey;
+ ack_rsp->rkey = mr->rkey;
+ ack_rsp->mrn = mrn;
+
+ return 0;
+}
+
+static int
+vhost_rdma_dereg_mr(struct vhost_rdma_device *dev, struct iovec *in, CTRL_NO_RSP)
+{
+ struct vhost_rdma_cmd_dereg_mr *dereg_cmd;
+ struct vhost_rdma_mr *mr;
+
+ CHK_IOVEC(dereg_cmd, in);
+
+ mr = vhost_rdma_pool_get(&dev->mr_pool, dereg_cmd->mrn);
+ if (unlikely(mr == NULL)) {
+ RDMA_LOG_ERR("mr not found");
+ }
+
+ mr->state = VHOST_MR_STATE_ZOMBIE;
+
+ vhost_rdma_drop_ref(mr->pd, dev, pd);
+ vhost_rdma_drop_ref(mr, dev, mr);
+
+ RDMA_LOG_DEBUG("destroy mr %u", dereg_cmd->mrn);
+
+ return 0;
+}
+
/* Command handler table declaration */
struct {
int (*handler)(struct vhost_rdma_device *dev, struct iovec *in, struct iovec *out);
@@ -663,6 +858,10 @@ struct {
DEFINE_VIRTIO_RDMA_CMD(VHOST_RDMA_CTRL_ROCE_DESTROY_CQ, vhost_rdma_destroy_cq),
DEFINE_VIRTIO_RDMA_CMD(VHOST_RDMA_CTRL_ROCE_CREATE_PD, vhost_rdma_create_pd),
DEFINE_VIRTIO_RDMA_CMD(VHOST_RDMA_CTRL_ROCE_DESTROY_PD, vhost_rdma_destroy_pd),
+ DEFINE_VIRTIO_RDMA_CMD(VHOST_RDMA_CTRL_ROCE_GET_DMA_MR, vhost_rdma_get_dma_mr),
+ DEFINE_VIRTIO_RDMA_CMD(VHOST_RDMA_CTRL_ROCE_ALLOC_MR, vhost_rdma_alloc_mr),
+ DEFINE_VIRTIO_RDMA_CMD(VHOST_RDMA_CTRL_ROCE_REG_USER_MR, vhost_rdma_reg_user_mr),
+ DEFINE_VIRTIO_RDMA_CMD(VHOST_RDMA_CTRL_ROCE_DEREG_MR, vhost_rdma_dereg_mr),
};
/**
diff --git a/examples/vhost_user_rdma/vhost_rdma_ib.h b/examples/vhost_user_rdma/vhost_rdma_ib.h
index 6356abc65a..ddfdcf4917 100644
--- a/examples/vhost_user_rdma/vhost_rdma_ib.h
+++ b/examples/vhost_user_rdma/vhost_rdma_ib.h
@@ -58,6 +58,9 @@ struct vhost_queue;
/** Maximum size for config space read/write operations */
#define VHOST_USER_MAX_CONFIG_SIZE 256
+#define USER_MMAP_TARGET_PAGE_SIZE 4096
+#define USER_MMAP_PAGE_MASK (~(USER_MMAP_TARGET_PAGE_SIZE-1))
+
/** ROCE control command types (virtio-rdma extension) */
#define VHOST_RDMA_CTRL_ROCE 6
#define VHOST_RDMA_CTRL_ROCE_QUERY_DEVICE 0
@@ -249,6 +252,14 @@ enum ib_port_speed {
VHOST_RDMA_IB_SPEED_NDR = 128,
};
+enum vhost_ib_access_flags {
+ VHOST_RDMA_IB_ACCESS_LOCAL_WRITE = (1 << 0),
+ VHOST_RDMA_IB_ACCESS_REMOTE_WRITE = (1 << 1),
+ VHOST_RDMA_IB_ACCESS_REMOTE_READ = (1 << 2),
+};
+
+#define VHOST_RDMA_IB_ACCESS_REMOTE (VHOST_RDMA_IB_ACCESS_REMOTE_WRITE | VHOST_RDMA_IB_ACCESS_REMOTE_READ)
+
/**
* @brief QP capabilities structure
*/
@@ -707,6 +718,60 @@ struct vhost_rdma_cmd_destroy_pd {
uint32_t pdn;
};
+struct vhost_rdma_cmd_alloc_mr {
+ /* The handle of PD which the MR associated with */
+ uint32_t pdn;
+ /* MR's protection attributes, enum virtio_ib_access_flags */
+ uint32_t access_flags;
+ uint32_t max_num_sg;
+};
+struct vhost_rdma_cmd_get_dma_mr {
+ /* The handle of PD which the MR associated with */
+ uint32_t pdn;
+ /* MR's protection attributes, enum virtio_ib_access_flags */
+ uint32_t access_flags;
+};
+
+struct vhost_rdma_ack_get_dma_mr {
+ /* The handle of MR */
+ uint32_t mrn;
+ /* MR's local access key */
+ uint32_t lkey;
+ /* MR's remote access key */
+ uint32_t rkey;
+};
+
+struct vhost_rdma_cmd_reg_user_mr {
+ /* The handle of PD which the MR associated with */
+ uint32_t pdn;
+ /* MR's protection attributes, enum virtio_ib_access_flags */
+ uint32_t access_flags;
+ /* Starting virtual address of MR */
+ uint64_t virt_addr;
+ /* Length of MR */
+ uint64_t length;
+ /* Size of the below page array */
+ uint32_t npages;
+ /* Padding */
+ uint32_t padding;
+ /* Array to store physical address of each page in MR */
+ uint64_t pages[];
+};
+
+struct vhost_rdma_ack_reg_user_mr {
+ /* The handle of MR */
+ uint32_t mrn;
+ /* MR's local access key */
+ uint32_t lkey;
+ /* MR's remote access key */
+ uint32_t rkey;
+};
+
+struct vhost_rdma_cmd_dereg_mr {
+ /* The handle of MR */
+ uint32_t mrn;
+};
+
/**
* @brief Convert IB MTU enum to byte size
* @param mtu The MTU enum value
@@ -792,4 +857,13 @@ int setup_iovs_from_descs(struct rte_vhost_memory *mem,
uint16_t *num_in,
uint16_t *num_out);
+void vhost_rdma_mr_init_key(struct vhost_rdma_mr *mr, uint32_t mrn);
+
+uint8_t vhost_rdma_get_next_key(uint32_t last_key);
+
+void vhost_rdma_map_pages(struct rte_vhost_memory *mem,
+ uint64_t *pages,
+ uint64_t *dma_pages,
+ uint32_t npages);
+
#endif /* __VHOST_RDMA_IB_H__ */
\ No newline at end of file
--
2.43.0
Powered by blists - more mailing lists