lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251218091050.55047-10-15927021679@163.com>
Date: Thu, 18 Dec 2025 17:09:49 +0800
From: Xiong Weimin <15927021679@....com>
To: "Michael S . Tsirkin" <mst@...hat.com>,
	David Hildenbrand <david@...hat.com>,
	Jason Wang <jasowang@...hat.com>,
	Stefano Garzarella <sgarzare@...hat.com>,
	Thomas Monjalon <thomas@...jalon.net>,
	David Marchand <david.marchand@...hat.com>,
	Luca Boccassi <bluca@...ian.org>,
	Kevin Traynor <ktraynor@...hat.com>,
	Christian Ehrhardt <christian.ehrhardt@...onical.com>,
	Xuan Zhuo <xuanzhuo@...ux.alibaba.com>,
	Eugenio Pérez <eperezma@...hat.com>,
	Xueming Li <xuemingl@...dia.com>,
	Maxime Coquelin <maxime.coquelin@...hat.com>,
	Chenbo Xia <chenbox@...dia.com>,
	Bruce Richardson <bruce.richardson@...el.com>
Cc: kvm@...r.kernel.org,
	virtualization@...ts.linux.dev,
	netdev@...r.kernel.org,
	xiongweimin <xiongweimin@...inos.cn>
Subject: [PATCH 09/10] drivers/infiniband/hw/virtio: Implement P_key, QP query and user MR resource management verbs

From: xiongweimin <xiongweimin@...inos.cn>

This commit adds support for essential RDMA resource management verbs:

1. P_Key Table Query:
   - Implements IB_QUERY_PKEY verb for partition key retrieval
   - Handles endianness conversion for cross-platform compatibility
   - Provides complete error handling for device communication failures

2. QP Attribute Query:
   - Full QP state retrieval including capabilities and AH attributes
   - Byte order handling for all struct fields
   - Init attribute preservation for consistency checks
   - Detailed error logging for debugging

3. User Memory Registration:
   - Memory pinning via ib_umem_get() with access flag enforcement
   - DMA-safe page table construction and bulk transfer to device
   - Multi-architecture DMA address handling
   - Strict memory boundary validation
   - Resource cleanup guarantees on all error paths

Key enhancements:
- Unified virtqueue command infrastructure
- Cross-architecture endianness handling
- Atomic page table transfer for registered memory regions
- Protection domain integration for memory access control
- Error injection points for robust resource recovery

Signed-off-by: Xiong Weimin <xiongweimin@...inos.cn>
---
 .../infiniband/hw/virtio/vrdma_dev_api.h      |  35 ++
 .../drivers/infiniband/hw/virtio/vrdma_ib.c   | 333 +++++++++++++++++-
 2 files changed, 367 insertions(+), 1 deletion(-)

diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev_api.h b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev_api.h
index d0ce02601..86b5ecade 100644
--- a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev_api.h
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev_api.h
@@ -225,6 +225,41 @@ struct vrdma_rsp_modify_qp {
     __u32 qpn;
 };
 
+struct vrdma_cmd_query_pkey {
+	__u32 port;
+	__u16 index;
+};
+
+struct vrdma_rsp_query_pkey {
+	__u16 pkey;
+};
+
+struct vrdma_cmd_query_qp {
+	__u32 qpn;
+	__u32 attr_mask;
+};
+
+struct vrdma_rsp_query_qp {
+	struct vrdma_qp_attr attr;
+};
+
+struct vrdma_cmd_reg_user_mr {
+	__u32 pdn;
+	__u32 access_flags;
+	__u64 start;
+	__u64 length;
+	__u64 virt_addr;
+
+	__u64 pages;
+	__u32 npages;
+};
+
+struct vrdma_rsp_reg_user_mr {
+	__u32 mrn;
+	__u32 lkey;
+	__u32 rkey;
+};
+
 #define VRDMA_CTRL_OK	0
 #define VRDMA_CTRL_ERR	1
 
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.c b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.c
index f9b129774..b1429e072 100644
--- a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.c
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.c
@@ -23,6 +23,7 @@
 #include "vrdma_queue.h"
 
 #define VRTIO_RDMA_PAGE_PER_TBL 512
+#define VRDMA_MAX_PAGES         512 * 512
 
 /**
  * cmd_str - String representation of virtio RDMA control commands
@@ -86,6 +87,36 @@ static void rdma_ah_attr_to_vrdma(struct vrdma_ah_attr *dst,
 	memcpy(&dst->roce, &src->roce, sizeof(struct roce_ah_attr));
 }
 
+static void vrdma_to_ib_global_route(struct ib_global_route *dst,
+			       const struct vrdma_global_route *src)
+{
+	dst->dgid = src->dgid;
+	dst->flow_label = src->flow_label;
+	dst->sgid_index = src->sgid_index;
+	dst->hop_limit = src->hop_limit;
+	dst->traffic_class = src->traffic_class;
+}
+
+static void vrdma_to_ib_qp_cap(struct ib_qp_cap *dst, const struct vrdma_qp_cap *src)
+{
+	dst->max_send_wr = src->max_send_wr;
+	dst->max_recv_wr = src->max_recv_wr;
+	dst->max_send_sge = src->max_send_sge;
+	dst->max_recv_sge = src->max_recv_sge;
+	dst->max_inline_data = src->max_inline_data;
+}
+
+static void vrdma_to_rdma_ah_attr(struct rdma_ah_attr *dst,
+			    const struct vrdma_ah_attr *src)
+{
+	vrdma_to_ib_global_route(rdma_ah_retrieve_grh(dst), &src->grh);
+	rdma_ah_set_sl(dst, src->sl);
+	rdma_ah_set_static_rate(dst, src->static_rate);
+	rdma_ah_set_port_num(dst, src->port_num);
+	rdma_ah_set_ah_flags(dst, src->ah_flags);
+	memcpy(&dst->roce, &src->roce, sizeof(struct roce_ah_attr));
+}
+
 /**
  * vrdma_exec_verbs_cmd - Execute a verbs command via control virtqueue
  * @vrdev: VRDMA device
@@ -2521,6 +2552,303 @@ static int vrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 	return rc;
 }
 
+/**
+ * vrdma_query_pkey - Query Partition Key (P_Key) at given index
+ * @ibdev:	Verbs device (vRDMA virtual device)
+ * @port:	Port number (1-indexed)
+ * @index:	P_Key table index
+ * @pkey:	Output buffer to store the P_Key value
+ *
+ * Queries the P_Key from the backend via virtqueue command.
+ * Only meaningful for IB-style ports (not RoCE).
+ *
+ * Context: Process context (may sleep). Can be called from user IOCTL path.
+ * Return:
+ * * 0 on success
+ * * -ENOMEM if command allocation fails
+ * * -EIO or other negative errno on communication failure
+ */
+static int vrdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
+{
+	struct vrdma_dev *vdev = to_vdev(ibdev);
+	struct vrdma_cmd_query_pkey *cmd;
+	struct vrdma_rsp_query_pkey *rsp;
+	struct scatterlist in, out;
+	int rc;
+
+	/* Allocate command and response buffers */
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
+	if (!rsp) {
+		kfree(cmd);
+		return -ENOMEM;
+	}
+
+	/* Fill input parameters */
+	cmd->port = cpu_to_le32(port);
+	cmd->index = cpu_to_le16(index);
+
+	/* Prepare scatterlists for virtqueue I/O */
+	sg_init_one(&in, cmd, sizeof(*cmd));
+	sg_init_one(&out, rsp, sizeof(*rsp));
+
+	/* Execute command */
+	rc = vrdma_exec_verbs_cmd(vdev, VIRTIO_RDMA_CMD_QUERY_PKEY, &in, &out);
+	if (rc) {
+		dev_err(&vdev->vdev->dev,
+			"VIRTIO_RDMA_CMD_QUERY_PKEY failed: port=%u idx=%u err=%d\n",
+			port, index, rc);
+		goto out_free;
+	}
+
+	/* Copy result to user */
+	*pkey = le16_to_cpu(rsp->pkey);
+
+out_free:
+	kfree(rsp);
+	kfree(cmd);
+	return rc;
+}
+
+/**
+ * vrdma_query_qp - Query QP attributes from the backend
+ * @ibqp:	Queue pair to query
+ * @attr:	Output structure for QP attributes
+ * @attr_mask:	Which fields are requested (ignored by some backends)
+ * @init_attr:	Output structure for init-time attributes
+ *
+ * Queries the QP state and configuration via a control virtqueue command.
+ * This is a synchronous operation.
+ *
+ * Context: Process context (can sleep)
+ * Return:
+ * * 0 on success
+ * * -ENOMEM if allocation fails
+ * * -EIO or other negative errno on communication failure
+ */
+static int vrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+			  int attr_mask, struct ib_qp_init_attr *init_attr)
+{
+	struct vrdma_qp *vqp = to_vqp(ibqp);
+	struct vrdma_dev *vdev = to_vdev(ibqp->device);
+	struct vrdma_cmd_query_qp *cmd;
+	struct vrdma_rsp_query_qp *rsp;
+	struct scatterlist in, out;
+	int rc;
+
+	/* Allocate command and response buffers */
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
+	if (!rsp) {
+		kfree(cmd);
+		return -ENOMEM;
+	}
+
+	/* Fill input parameters */
+	cmd->qpn = cpu_to_le32(vqp->qp_handle);
+	cmd->attr_mask = cpu_to_le32(attr_mask); /* Optional optimization */
+
+	sg_init_one(&in, cmd, sizeof(*cmd));
+	sg_init_one(&out, rsp, sizeof(*rsp));
+
+	/* Execute command over control virtqueue */
+	rc = vrdma_exec_verbs_cmd(vdev, VIRTIO_RDMA_CMD_QUERY_QP, &in, &out);
+	if (rc) {
+		dev_err(&vdev->vdev->dev,
+			"VIRTIO_RDMA_CMD_QUERY_QP failed: qpn=0x%x err=%d\n",
+			vqp->qp_handle, rc);
+		goto out_free;
+	}
+
+	/* Only copy results on success */
+	attr->qp_state = rsp->attr.qp_state;
+	attr->cur_qp_state = rsp->attr.cur_qp_state;
+	attr->path_mtu = rsp->attr.path_mtu;
+	attr->path_mig_state = rsp->attr.path_mig_state;
+	attr->qkey = le32_to_cpu(rsp->attr.qkey);
+	attr->rq_psn = le32_to_cpu(rsp->attr.rq_psn);
+	attr->sq_psn = le32_to_cpu(rsp->attr.sq_psn);
+	attr->dest_qp_num = le32_to_cpu(rsp->attr.dest_qp_num);
+	attr->qp_access_flags = le32_to_cpu(rsp->attr.qp_access_flags);
+	attr->pkey_index = le16_to_cpu(rsp->attr.pkey_index);
+	attr->alt_pkey_index = le16_to_cpu(rsp->attr.alt_pkey_index);
+	attr->en_sqd_async_notify = rsp->attr.en_sqd_async_notify;
+	attr->sq_draining = rsp->attr.sq_draining;
+	attr->max_rd_atomic = rsp->attr.max_rd_atomic;
+	attr->max_dest_rd_atomic = rsp->attr.max_dest_rd_atomic;
+	attr->min_rnr_timer = rsp->attr.min_rnr_timer;
+	attr->port_num = rsp->attr.port_num;
+	attr->timeout = rsp->attr.timeout;
+	attr->retry_cnt = rsp->attr.retry_cnt;
+	attr->rnr_retry = rsp->attr.rnr_retry;
+	attr->alt_port_num = rsp->attr.alt_port_num;
+	attr->alt_timeout = rsp->attr.alt_timeout;
+	attr->rate_limit = le32_to_cpu(rsp->attr.rate_limit);
+
+	/* Copy capabilities */
+	vrdma_to_ib_qp_cap(&attr->cap, &rsp->attr.cap);
+
+	/* Convert AH attributes (contains GRH + DIP) */
+	vrdma_to_rdma_ah_attr(&attr->ah_attr, &rsp->attr.ah_attr);
+	vrdma_to_rdma_ah_attr(&attr->alt_ah_attr, &rsp->attr.alt_ah_attr);
+
+	/* Fill init attributes (mostly static) */
+	init_attr->event_handler = vqp->ibqp.event_handler;
+	init_attr->qp_context = vqp->ibqp.qp_context;
+	init_attr->send_cq = vqp->ibqp.send_cq;
+	init_attr->recv_cq = vqp->ibqp.recv_cq;
+	init_attr->srq = vqp->ibqp.srq;
+	init_attr->xrcd = NULL; /* Not supported in vRDMA */
+	init_attr->cap = attr->cap;
+	init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; /* Or driver default */
+	init_attr->qp_type = vqp->ibqp.qp_type;
+	init_attr->create_flags = 0;
+	init_attr->port_num = vqp->port;
+
+out_free:
+	kfree(rsp);
+	kfree(cmd);
+	return rc;
+}
+
+/**
+ * vrdma_reg_user_mr - Register a user memory region
+ * @pd:		Protection domain
+ * @start:	User virtual address of memory to register
+ * @length:	Length of memory region
+ * virt_addr:	Optional virtual address for rkey access (often same as start)
+ * @access_flags: Access permissions (IB_ACCESS_xxx)
+ * @udata:	User data (optional, unused here)
+ *
+ * Locks down user pages, builds page table, and registers MR with backend.
+ * Returns pointer to ib_mr or ERR_PTR on failure.
+ *
+ * Context: Process context (may sleep during ib_umem_get)
+ * Return:
+ * * Pointer to &mr->ibmr on success
+ * * ERR_PTR(-errno) on failure
+ */
+static struct ib_mr *vrdma_reg_user_mr(struct ib_pd *pd, u64 start,
+					     u64 length, u64 virt_addr,
+					     int access_flags,
+					     struct ib_udata *udata)
+{
+	struct vrdma_dev *dev = to_vdev(pd->device);
+	struct vrdma_cmd_reg_user_mr *cmd;
+	struct vrdma_rsp_reg_user_mr *rsp;
+	struct vrdma_mr *mr;
+	struct ib_umem *umem;
+	struct sg_dma_page_iter sg_iter;
+	struct scatterlist in, out;
+	int rc = 0;
+	unsigned npages;
+	dma_addr_t *pages_flat = NULL;
+
+	/* Step 1: Pin user memory pages */
+	umem = ib_umem_get(pd->device, start, length, access_flags);
+	if (IS_ERR(umem)) {
+		dev_err(&dev->vdev->dev, "Failed to pin user memory: va=0x%llx len=%llu\n",
+			start, length);
+		return ERR_CAST(umem);
+	}
+
+	npages = ib_umem_num_pages(umem);
+	if (npages == 0 || npages > VRDMA_MAX_PAGES) { // e.g., VRDMA_MAX_PAGES = 512*512
+		dev_err(&dev->vdev->dev, "Invalid number of pages: %u\n", npages);
+		rc = -EINVAL;
+		goto err_umem;
+	}
+
+	/* Allocate command/response structures (GFP_KERNEL ok in process context) */
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!cmd || !rsp || !mr) {
+		rc = -ENOMEM;
+		goto err_alloc;
+	}
+
+	/* Initialize MR structure */
+	mr->umem = umem;
+	mr->size = length;
+	mr->iova = virt_addr;
+	mr->max_pages = npages;
+
+	/* Allocate contiguous DMA-mapped array for page addresses */
+	pages_flat = dma_alloc_coherent(&dev->vdev->dev,
+					npages * sizeof(dma_addr_t),
+					&mr->dma_pages, GFP_KERNEL);
+	if (!pages_flat) {
+		dev_err(&dev->vdev->dev, "Failed to allocate DMA memory for page table\n");
+		rc = -ENOMEM;
+		goto err_alloc;
+	}
+	mr->pages_k = &pages_flat; /* Treat as 2D: [i/512][i%512] */
+
+	/* Fill page table from ib_umem scatterlist */
+	mr->npages = 0;
+	for_each_sg_dma_page(umem->sgt_append.sgt.sgl, &sg_iter, umem->sgt_append.sgt.nents, 0) {
+		dma_addr_t addr = sg_page_iter_dma_address(&sg_iter);
+		pages_flat[mr->npages++] = addr;
+	}
+
+	/* Sanity check: should match ib_umem_num_pages() */
+	WARN_ON(mr->npages != npages);
+
+	/* Prepare command */
+	cmd->pdn = cpu_to_le32(to_vpd(pd)->pd_handle);
+	cmd->start = cpu_to_le64(start);
+	cmd->length = cpu_to_le64(length);
+	cmd->virt_addr = cpu_to_le64(virt_addr);
+	cmd->access_flags = cpu_to_le32(access_flags);
+	cmd->pages = cpu_to_le64(mr->dma_pages); /* DMA address of page array */
+	cmd->npages = cpu_to_le32(npages);
+
+	sg_init_one(&in, cmd, sizeof(*cmd));
+	sg_init_one(&out, rsp, sizeof(*rsp));
+
+	/* Send command to backend */
+	rc = vrdma_exec_verbs_cmd(dev, VIRTIO_RDMA_CMD_REG_USER_MR, &in, &out);
+	if (rc) {
+		dev_err(&dev->vdev->dev, "Backend failed to register MR: %d\n", rc);
+		goto err_cmd;
+	}
+
+	/* Copy results from response */
+	mr->mr_handle = le32_to_cpu(rsp->mrn);
+	mr->ibmr.lkey = le32_to_cpu(rsp->lkey);
+	mr->ibmr.rkey = le32_to_cpu(rsp->rkey);
+
+	/* Cleanup temporary allocations */
+	kfree(cmd);
+	kfree(rsp);
+
+	/* Link MR to PD if needed, initialize other fields */
+	mr->ibmr.pd = pd;
+	mr->ibmr.device = pd->device;
+	mr->ibmr.type = IB_MR_TYPE_MEM_REG;
+	mr->ibmr.length = length;
+
+	return &mr->ibmr;
+
+err_cmd:
+	dma_free_coherent(&dev->vdev->dev, npages * sizeof(dma_addr_t),
+			  pages_flat, mr->dma_pages);
+err_alloc:
+	kfree(mr);
+	kfree(rsp);
+	kfree(cmd);
+err_umem:
+	ib_umem_release(umem);
+	return ERR_PTR(rc);
+}
+
 static const struct ib_device_ops vrdma_dev_ops = {
 	.owner = THIS_MODULE,
 	.uverbs_abi_ver = VIRTIO_RDMA_ABI_VERSION,
@@ -2554,7 +2882,10 @@ static const struct ib_device_ops vrdma_dev_ops = {
 	.modify_qp = vrdma_modify_qp,
 	.poll_cq = vrdma_poll_cq,
 	.post_recv = vrdma_post_recv,
-	.post_send = vrdma_post_send,			
+	.post_send = vrdma_post_send,
+	.query_pkey = vrdma_query_pkey,
+	.query_qp = vrdma_query_qp,
+	.reg_user_mr = vrdma_reg_user_mr,			
 };
 
 /**
-- 
2.43.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ