lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251218091050.55047-2-15927021679@163.com>
Date: Thu, 18 Dec 2025 17:09:41 +0800
From: Xiong Weimin <15927021679@....com>
To: "Michael S . Tsirkin" <mst@...hat.com>,
	David Hildenbrand <david@...hat.com>,
	Jason Wang <jasowang@...hat.com>,
	Stefano Garzarella <sgarzare@...hat.com>,
	Thomas Monjalon <thomas@...jalon.net>,
	David Marchand <david.marchand@...hat.com>,
	Luca Boccassi <bluca@...ian.org>,
	Kevin Traynor <ktraynor@...hat.com>,
	Christian Ehrhardt <christian.ehrhardt@...onical.com>,
	Xuan Zhuo <xuanzhuo@...ux.alibaba.com>,
	Eugenio Pérez <eperezma@...hat.com>,
	Xueming Li <xuemingl@...dia.com>,
	Maxime Coquelin <maxime.coquelin@...hat.com>,
	Chenbo Xia <chenbox@...dia.com>,
	Bruce Richardson <bruce.richardson@...el.com>
Cc: kvm@...r.kernel.org,
	virtualization@...ts.linux.dev,
	netdev@...r.kernel.org,
	xiongweimin <xiongweimin@...inos.cn>
Subject: [PATCH 01/10] drivers/infiniband/hw/virtio: Initial driver for virtio RDMA devices

From: xiongweimin <xiongweimin@...inos.cn>

This commit introduces a new driver for RDMA over virtio, enabling
RDMA capabilities in virtualized environments. The driver consists
of the following main components:

1. Driver registration with the virtio subsystem and device discovery.
2. Device probe and remove handlers for managing the device lifecycle.
3. Initialization of the InfiniBand device attributes by reading the
   virtio configuration space, including conversion from little-endian
   to CPU byte order and capability mapping.
4. Setup of virtqueues for:
   - Control commands (no callback)
   - Completion queues (with callback for CQ events)
   - Send and receive queues for queue pairs (no callbacks)
5. Integration with the network device layer for RoCE support.
6. Registration with the InfiniBand core subsystem.
7. Comprehensive error handling during initialization and a symmetric
   teardown process.

Key features:
- Support for multiple virtqueues based on device capabilities (max_cq, max_qp)
- Fast doorbell optimization when notify_offset_multiplier equals PAGE_SIZE
- Safe resource management with rollback on failure

Signed-off-by: Xiong Weimin <xiongweimin@...inos.cn>
---
 linux-6.16.8/drivers/infiniband/Kconfig       |   1 +
 linux-6.16.8/drivers/infiniband/hw/Makefile   |   1 +
 .../drivers/infiniband/hw/virtio/Kconfig      |   6 +
 .../drivers/infiniband/hw/virtio/Makefile     |   5 +
 .../drivers/infiniband/hw/virtio/vrdma.h      |  82 ++++++
 .../drivers/infiniband/hw/virtio/vrdma_dev.c  | 272 ++++++++++++++++++
 .../drivers/infiniband/hw/virtio/vrdma_dev.h  |  16 ++
 .../infiniband/hw/virtio/vrdma_dev_api.h      | 116 ++++++++
 .../drivers/infiniband/hw/virtio/vrdma_ib.c   | 172 +++++++++++
 .../drivers/infiniband/hw/virtio/vrdma_ib.h   |  81 ++++++
 .../drivers/infiniband/hw/virtio/vrdma_main.c | 159 ++++++++++
 .../infiniband/hw/virtio/vrdma_netdev.c       | 105 +++++++
 .../infiniband/hw/virtio/vrdma_netdev.h       |  14 +
 .../infiniband/hw/virtio/vrdma_queue.c        |  21 ++
 .../infiniband/hw/virtio/vrdma_queue.h        |  14 +
 linux-6.16.8/include/rdma/vrdma_abi.h         |  62 ++++
 linux-6.16.8/include/uapi/linux/virtio_ids.h  |   1 +
 .../include/uapi/rdma/ib_user_ioctl_verbs.h   |   1 +
 18 files changed, 1129 insertions(+)
 create mode 100644 linux-6.16.8/drivers/infiniband/hw/virtio/Kconfig
 create mode 100644 linux-6.16.8/drivers/infiniband/hw/virtio/Makefile
 create mode 100644 linux-6.16.8/drivers/infiniband/hw/virtio/vrdma.h
 create mode 100644 linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev.c
 create mode 100644 linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev.h
 create mode 100644 linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev_api.h
 create mode 100644 linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.c
 create mode 100644 linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.h
 create mode 100644 linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_main.c
 create mode 100644 linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_netdev.c
 create mode 100644 linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_netdev.h
 create mode 100644 linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_queue.c
 create mode 100644 linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_queue.h
 create mode 100644 linux-6.16.8/include/rdma/vrdma_abi.h

diff --git a/linux-6.16.8/drivers/infiniband/Kconfig b/linux-6.16.8/drivers/infiniband/Kconfig
index a5827d11e..9ba5f5628 100644
--- a/linux-6.16.8/drivers/infiniband/Kconfig
+++ b/linux-6.16.8/drivers/infiniband/Kconfig
@@ -94,6 +94,7 @@ source "drivers/infiniband/hw/ocrdma/Kconfig"
 source "drivers/infiniband/hw/qedr/Kconfig"
 source "drivers/infiniband/hw/qib/Kconfig"
 source "drivers/infiniband/hw/usnic/Kconfig"
+source "drivers/infiniband/hw/virtio/Kconfig"
 source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
 source "drivers/infiniband/sw/rdmavt/Kconfig"
 endif # !UML
diff --git a/linux-6.16.8/drivers/infiniband/hw/Makefile b/linux-6.16.8/drivers/infiniband/hw/Makefile
index aba96ca9b..63253a066 100644
--- a/linux-6.16.8/drivers/infiniband/hw/Makefile
+++ b/linux-6.16.8/drivers/infiniband/hw/Makefile
@@ -14,4 +14,5 @@ obj-$(CONFIG_INFINIBAND_HFI1)		+= hfi1/
 obj-$(CONFIG_INFINIBAND_HNS_HIP08)	+= hns/
 obj-$(CONFIG_INFINIBAND_QEDR)		+= qedr/
 obj-$(CONFIG_INFINIBAND_BNXT_RE)	+= bnxt_re/
+obj-$(CONFIG_INFINIBAND_VIRTIO_RDMA)	+= virtio/
 obj-$(CONFIG_INFINIBAND_ERDMA)		+= erdma/
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/Kconfig b/linux-6.16.8/drivers/infiniband/hw/virtio/Kconfig
new file mode 100644
index 000000000..a5624f98f
--- /dev/null
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/Kconfig
@@ -0,0 +1,6 @@
+config INFINIBAND_VIRTIO_RDMA
+	tristate "VirtIO Paravirtualized RDMA Driver"
+	depends on NETDEVICES && ETHERNET && PCI && INET && VIRTIO
+	help
+	  This driver provides low-level support for VirtIO Paravirtual
+	  RDMA adapter.
\ No newline at end of file
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/Makefile b/linux-6.16.8/drivers/infiniband/hw/virtio/Makefile
new file mode 100644
index 000000000..dbed6471e
--- /dev/null
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/Makefile
@@ -0,0 +1,5 @@
+#obj-$(CONFIG_INFINIBAND_VIRTIO_RDMA) := virtio_rdma.o
+obj-m += vrdma.o
+
+vrdma-y := vrdma_main.o vrdma_dev.o vrdma_ib.o \
+		 vrdma_netdev.o vrdma_dev.o vrdma_queue.o  
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma.h b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma.h
new file mode 100644
index 000000000..bc72d9c5e
--- /dev/null
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma.h
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Xiong Weimin <xiongweimin@...inos.cn> */
+/* Copyright 2020.kylinos.cn.All Rights Reserved.*/
+
+#ifndef __VIRTIO_RDMA_H__
+#define __VIRTIO_RDMA_H__
+
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ids.h>
+#include <linux/netdevice.h>
+#include <rdma/ib_verbs.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+
+/**
+ * struct vrdma_dev - Virtual RDMA device structure
+ * @ib_dev:		InfiniBand device (must be first for container_of)
+ * @attr:		Cached device attributes
+ * @netdev:		Associated network device (for offload, etc.)
+ * @vdev:		Virtio device backing this RDMA device
+ * @ctrl_vq:		Control virtqueue for configuration and management
+ * @ctrl_lock:		Spinlock protecting control operations
+ * @cq_vqs:		Array of CQ (Completion Queue) virtual queues
+ * @cqs:		Pointer array to active completion queues
+ * @qp_vqs:		Array of QP (Queue Pair) virtual queues
+ * @num_qp:		Counter for active queue pairs
+ * @num_cq:		Counter for active completion queues
+ * @num_ah:		Counter for active address handles
+ * @pending_mmaps:	List of pending memory mappings for mmap handling
+ * @pending_mmaps_lock: Lock protecting pending_mmaps list
+ * @port_mutex:		Mutex for port state changes
+ * @port_cap_mask:	Port capabilities bitmask
+ * @ib_active:		Flag indicating whether IB port is active
+ * @fast_doorbell:	Enable fast doorbell mechanism (if supported)
+ */
+struct vrdma_dev {
+	/* Must come first for proper container_of usage in IB layer */
+	struct ib_device ib_dev;
+
+	/* Device attributes cache */
+	struct ib_device_attr attr;
+
+	/* Optional associated net device (e.g., for IPoIB or offload) */
+	struct net_device *netdev;
+
+	/* Backend virtio device and control vq */
+	struct virtio_device *vdev;
+	struct virtqueue *ctrl_vq;
+
+	/* Lock for controlling access to ctrl_vq */
+	spinlock_t ctrl_lock;
+
+	/* Completion Queue (CQ) related */
+	struct vrdma_vq *cq_vqs;	/* Array of CQ VQs */
+	struct vrdma_cq **cqs;		/* Array of pointers to CQs */
+
+	/* Queue Pair (QP) related */
+	struct vrdma_vq *qp_vqs;	/* Array of QP VQs */
+
+	/* Resource counters */
+	atomic_t num_qp;
+	atomic_t num_cq;
+	atomic_t num_ah;
+
+	/* Pending mmaps from userspace */
+	struct list_head pending_mmaps;
+	spinlock_t pending_mmaps_lock;
+
+	/* Port management */
+	struct mutex port_mutex;
+	u32 port_cap_mask;
+
+	/* Runtime state flags */
+	bool ib_active;
+	bool fast_doorbell;
+};
+
+#endif
\ No newline at end of file
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev.c b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev.c
new file mode 100644
index 000000000..0a09b3bd4
--- /dev/null
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Xiong Weimin <xiongweimin@...inos.cn> */
+/* Copyright 2020.kylinos.cn.All Rights Reserved.*/
+
+#include <linux/virtio_config.h>
+
+#include "vrdma.h"
+#include "vrdma_dev_api.h"
+#include "vrdma_queue.h"
+
+/**
+ * init_device_attr - Initialize IB device attributes from virtio config space
+ * @rdev: Virtio RDMA device
+ *
+ * Reads the device configuration fields and populates the InfiniBand device
+ * attributes (&rdev->ib_dev.attrs). This function must be called during device
+ * probe after the virtqueue is ready but before registering the IB device.
+ */
+static void init_device_attr(struct vrdma_dev *rdev)
+{
+	struct ib_device_attr *attr = &rdev->attr;
+	struct vrdma_config cfg;
+
+	/* Zero out attribute structure */
+	memset(attr, 0, sizeof(*attr));
+
+	/* Read entire config at once for efficiency and atomicity */
+	virtio_cread(rdev->vdev, struct vrdma_config, phys_port_cnt, &cfg.phys_port_cnt);
+	virtio_cread(rdev->vdev, struct vrdma_config, sys_image_guid, &cfg.sys_image_guid);
+	virtio_cread(rdev->vdev, struct vrdma_config, vendor_id, &cfg.vendor_id);
+	virtio_cread(rdev->vdev, struct vrdma_config, vendor_part_id, &cfg.vendor_part_id);
+	virtio_cread(rdev->vdev, struct vrdma_config, hw_ver, &cfg.hw_ver);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_mr_size, &cfg.max_mr_size);
+	virtio_cread(rdev->vdev, struct vrdma_config, page_size_cap, &cfg.page_size_cap);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_qp, &cfg.max_qp);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_qp_wr, &cfg.max_qp_wr);
+	virtio_cread(rdev->vdev, struct vrdma_config, device_cap_flags, &cfg.device_cap_flags);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_send_sge, &cfg.max_send_sge);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_recv_sge, &cfg.max_recv_sge);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_sge_rd, &cfg.max_sge_rd);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_cq, &cfg.max_cq);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_cqe, &cfg.max_cqe);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_mr, &cfg.max_mr);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_pd, &cfg.max_pd);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_qp_rd_atom, &cfg.max_qp_rd_atom);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_res_rd_atom, &cfg.max_res_rd_atom);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_qp_init_rd_atom, &cfg.max_qp_init_rd_atom);
+	virtio_cread(rdev->vdev, struct vrdma_config, atomic_cap, &cfg.atomic_cap);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_mw, &cfg.max_mw);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_mcast_grp, &cfg.max_mcast_grp);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_mcast_qp_attach, &cfg.max_mcast_qp_attach);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_total_mcast_qp_attach, &cfg.max_total_mcast_qp_attach);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_ah, &cfg.max_ah);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_fast_reg_page_list_len, &cfg.max_fast_reg_page_list_len);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_pi_fast_reg_page_list_len, &cfg.max_pi_fast_reg_page_list_len);
+	virtio_cread(rdev->vdev, struct vrdma_config, max_pkeys, &cfg.max_pkeys);
+	virtio_cread(rdev->vdev, struct vrdma_config, local_ca_ack_delay, &cfg.local_ca_ack_delay);
+
+	/* Copy values into ib_device_attr with proper type conversion */
+	rdev->ib_dev.phys_port_cnt = le32_to_cpu(cfg.phys_port_cnt);
+
+	attr->sys_image_guid = le64_to_cpu(cfg.sys_image_guid);
+	attr->vendor_id = le32_to_cpu(cfg.vendor_id);
+	attr->vendor_part_id = le32_to_cpu(cfg.vendor_part_id);
+	attr->hw_ver = le32_to_cpu(cfg.hw_ver);
+	attr->max_mr_size = le64_to_cpu(cfg.max_mr_size);
+	attr->page_size_cap = le64_to_cpu(cfg.page_size_cap);
+	attr->max_qp = le32_to_cpu(cfg.max_qp);
+	attr->max_qp_wr = le32_to_cpu(cfg.max_qp_wr);
+	attr->device_cap_flags = le64_to_cpu(cfg.device_cap_flags);
+	attr->max_send_sge = le32_to_cpu(cfg.max_send_sge);
+	attr->max_recv_sge = le32_to_cpu(cfg.max_recv_sge);
+	attr->max_srq_sge = attr->max_send_sge; /* unless SRQ supported */
+	attr->max_sge_rd = le32_to_cpu(cfg.max_sge_rd);
+	attr->max_cq = le32_to_cpu(cfg.max_cq);
+	attr->max_cqe = le32_to_cpu(cfg.max_cqe);
+	attr->max_mr = le32_to_cpu(cfg.max_mr);
+	attr->max_pd = le32_to_cpu(cfg.max_pd);
+	attr->max_qp_rd_atom = le32_to_cpu(cfg.max_qp_rd_atom);
+	attr->max_res_rd_atom = le32_to_cpu(cfg.max_res_rd_atom);
+	attr->max_qp_init_rd_atom = le32_to_cpu(cfg.max_qp_init_rd_atom);
+	attr->atomic_cap = vrdma_atomic_cap_to_ib(le32_to_cpu(cfg.atomic_cap));
+	attr->max_mw = le32_to_cpu(cfg.max_mw);
+	attr->max_mcast_grp = le32_to_cpu(cfg.max_mcast_grp);
+	attr->max_mcast_qp_attach = le32_to_cpu(cfg.max_mcast_qp_attach);
+	attr->max_total_mcast_qp_attach = le32_to_cpu(cfg.max_total_mcast_qp_attach);
+	attr->max_ah = le32_to_cpu(cfg.max_ah);
+	attr->max_fast_reg_page_list_len = le32_to_cpu(cfg.max_fast_reg_page_list_len);
+	attr->max_pi_fast_reg_page_list_len = le32_to_cpu(cfg.max_pi_fast_reg_page_list_len);
+	attr->max_pkeys = le16_to_cpu(cfg.max_pkeys);
+	attr->local_ca_ack_delay = cfg.local_ca_ack_delay;
+}
+
+/**
+ * vrdma_init_device - Initialize virtqueues for a vRDMA device
+ * @dev: The vRDMA device to initialize
+ *
+ * Returns 0 on success, or negative errno on failure.
+ */
+int vrdma_init_device(struct vrdma_dev *dev)
+{
+	int rc;
+	struct virtqueue **vqs;
+	struct virtqueue_info *vqs_info;
+	unsigned int i, cur_vq;
+	unsigned int total_vqs;
+	uint32_t max_cq, max_qp;
+
+	/* Initialize device attributes */
+	init_device_attr(dev);
+	max_cq = dev->attr.max_cq;
+	max_qp = dev->attr.max_qp; /* SRQ not supported, so ignored */
+
+	/*
+	 * Total virtqueues:
+	 *   1 control queue (for verbs commands)
+	 *   max_cq completion queues (CQ)
+	 *   max_qp * 2 data queues (send & recv queue pairs per QP)
+	 */
+	total_vqs = 1 + max_cq + 2 * max_qp;
+
+	/* Allocate storage in dev */
+	dev->cq_vqs = kcalloc(max_cq, sizeof(*dev->cq_vqs), GFP_ATOMIC);
+	if (!dev->cq_vqs)
+		return -ENOMEM;
+
+	dev->cqs = kcalloc(max_cq, sizeof(*dev->cqs), GFP_ATOMIC);
+	if (!dev->cqs) {
+		rc = -ENOMEM;
+		goto err_free_cq_vqs;
+	}
+
+	dev->qp_vqs = kcalloc(2 * max_qp, sizeof(*dev->qp_vqs), GFP_ATOMIC);
+	if (!dev->qp_vqs) {
+		rc = -ENOMEM;
+		goto err_free_cqs;
+	}
+
+
+	vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL);
+	/* Temporary arrays for virtio_find_vqs */
+	vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL);
+	if (!vqs_info || !vqs) {
+		rc = -ENOMEM;
+		goto err_free_vqs;
+	}
+
+	/* Setup queue names and callbacks */
+	cur_vq = 0;
+
+	/* Control virtqueue (no callback) */
+	vqs_info[cur_vq].name = "vrdma-ctrl";
+	vqs_info[cur_vq].callback = NULL;
+	cur_vq++;
+
+	/* Completion Queue virtqueues */
+	for (i = 0; i < max_cq; i++) {
+		snprintf(dev->cq_vqs[i].name, sizeof(dev->cq_vqs[i].name),
+				 "cq.%u", i);
+		vqs_info[cur_vq].name = dev->cq_vqs[i].name;
+		vqs_info[cur_vq].callback = vrdma_cq_ack;
+		cur_vq++;
+	}
+
+	/* Send/Receive Queue Pairs for each QP */
+	for (i = 0; i < max_qp; i++) {
+		snprintf(dev->qp_vqs[2 * i].name, sizeof(dev->qp_vqs[2 * i].name),
+				 "sqp.%u", i);
+		snprintf(dev->qp_vqs[2 * i + 1].name, sizeof(dev->qp_vqs[2 * i + 1].name),
+				 "rqp.%u", i);
+
+		vqs_info[cur_vq].name = dev->qp_vqs[2 * i].name;
+		vqs_info[cur_vq + 1].name  = dev->qp_vqs[2 * i + 1].name;
+
+		vqs_info[cur_vq].callback	 = NULL; /* No TX callback */
+		vqs_info[cur_vq + 1].callback = NULL; /* No RX callback */
+
+		cur_vq += 2;
+	}
+
+	/* Now ask VirtIO layer to set up the virtqueues */
+	rc = virtio_find_vqs(dev->vdev, total_vqs, vqs, vqs_info, NULL);
+	if (rc) {
+		pr_err("Failed to find %u virtqueues: %d\n", total_vqs, rc);
+		goto err_free_vqs;
+	}
+
+	/* Assign found virtqueues to device structures */
+	cur_vq = 0;
+	dev->ctrl_vq = vqs[cur_vq++];
+
+	for (i = 0; i < max_cq; i++) {
+		dev->cq_vqs[i].vq = vqs[cur_vq++];
+		dev->cq_vqs[i].idx = i;
+		spin_lock_init(&dev->cq_vqs[i].lock);
+	}
+
+	for (i = 0; i < max_qp; i++) {
+		struct vrdma_vq *sq = &dev->qp_vqs[2 * i];
+		struct vrdma_vq *rq = &dev->qp_vqs[2 * i + 1];
+
+		sq->vq = vqs[cur_vq++];
+		rq->vq = vqs[cur_vq++];
+
+		sq->idx = i;
+		rq->idx = i;
+
+		spin_lock_init(&sq->lock);
+		spin_lock_init(&rq->lock);
+	}
+
+	/* Final setup */
+	mutex_init(&dev->port_mutex);
+	dev->ib_active = true;
+
+	/* Cleanup temporary arrays */
+	kfree(vqs);
+
+	return 0;
+
+err_free_vqs:
+	kfree(vqs_info);
+	kfree(vqs);
+err_free_cqs:
+	kfree(dev->cqs);
+	dev->cqs = NULL;
+err_free_cq_vqs:
+	kfree(dev->cq_vqs);
+	dev->cq_vqs = NULL;
+
+	return rc;
+}
+
+void vrdma_finish_device(struct vrdma_dev *dev)
+{
+	if (!dev) {
+		pr_err("%s: invalid device pointer\n", __func__);
+		return;
+	}
+
+	if (!dev->vdev || !dev->vdev->config) {
+		pr_warn("%s: device or config is NULL, skipping teardown\n", __func__);
+		return;
+	}
+
+	/* Step 1: Mark device as inactive to prevent new operations */
+	dev->ib_active = false;
+
+	/* Step 2: Synchronize and stop any pending work (e.g., CQ processing) */
+	mutex_lock(&dev->port_mutex);
+	/* If there are workqueues or timers, flush them here */
+	// flush_work(&dev->cq_task);	 // example
+	// del_timer_sync(&dev->poll_timer); // example
+	mutex_unlock(&dev->port_mutex);
+
+	/* Step 3: Bring the device into reset state */
+	dev->vdev->config->reset(dev->vdev);
+
+	/* Step 4: Delete all virtqueues (this also synchronizes with callbacks) */
+	dev->vdev->config->del_vqs(dev->vdev);
+
+	/* Step 5: Free dynamically allocated arrays */
+	kfree(dev->cq_vqs);	 /* Free CQ queue metadata */
+	dev->cq_vqs = NULL;
+
+	kfree(dev->cqs);		/* Free CQ context array */
+	dev->cqs = NULL;
+
+	kfree(dev->qp_vqs);	 /* Free QP send/receive queue metadata */
+	dev->qp_vqs = NULL;
+}
\ No newline at end of file
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev.h b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev.h
new file mode 100644
index 000000000..78e243faf
--- /dev/null
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev.h
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Xiong Weimin <xiongweimin@...inos.cn> */
+/* Copyright 2020.kylinos.cn.All Rights Reserved.*/
+#ifndef __VRDMA_DEVICE_H__
+#define __VRDMA_DEVICE_H__
+
+#define VIRTIO_RDMA_BOARD_ID	1
+#define VIRTIO_RDMA_HW_NAME	"virtio-rdma"
+#define VIRTIO_RDMA_HW_REV	1
+#define VIRTIO_RDMA_DRIVER_VER	"1.0"
+
+int  vrdma_init_device(struct vrdma_dev *dev);
+void vrdma_finish_device(struct vrdma_dev *dev);
+
+#endif
\ No newline at end of file
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev_api.h b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev_api.h
new file mode 100644
index 000000000..403d5e820
--- /dev/null
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev_api.h
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Xiong Weimin <xiongweimin@...inos.cn> */
+/* Copyright 2020.kylinos.cn.All Rights Reserved.*/
+
+#ifndef __VRDMA_DEV_API_H__
+#define __VRDMA_DEV_API_H__
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <rdma/ib_verbs.h>
+
+#include <rdma/vrdma_abi.h>
+
+/**
+ * struct vrdma_config - Virtio RDMA device configuration
+ *
+ * This structure is mapped from the virtio device's configuration space and
+ * describes the capabilities and attributes of the host RDMA device.
+ * All fields are in little-endian byte order (__le* types).
+ */
+struct vrdma_config {
+	__le32		phys_port_cnt;			/* Number of physical ports */
+
+	__le64		sys_image_guid;			/* System image GUID */
+	__le32		vendor_id;				/* Vendor ID (PCI-style) */
+	__le32		vendor_part_id;			/* Vendor part/device ID */
+	__le32		hw_ver;					/* Hardware version */
+	__le64		max_mr_size;			/* Maximum memory region size */
+	__le64		page_size_cap;			/* Supported page sizes bitmask */
+	__le32		max_qp;					/* Max number of queue pairs */
+	__le32		max_qp_wr;				/* Max outstanding WRs per QP */
+	__le64		device_cap_flags;		/* Device capability flags (&enum ib_device_cap_flags) */
+	__le32		max_send_sge;			/* Max SGEs in a SEND WR */
+	__le32		max_recv_sge;			/* Max SGEs in a RECV WR */
+	__le32		max_sge_rd;				/* Max SGEs in an RDMA READ/ATOMIC WR */
+	__le32		max_cq;					/* Max number of completion queues */
+	__le32		max_cqe;				/* Max entries per CQ */
+	__le32		max_mr;					/* Max number of memory regions */
+	__le32		max_pd;					/* Max number of protection domains */
+	__le32		max_qp_rd_atom;			/* Max RDMA read atoms per QP */
+	__le32		max_res_rd_atom;		/* Total RDMA read atoms system-wide */
+	__le32		max_qp_init_rd_atom;	/* Max init RD atoms per QP */
+	__le32		atomic_cap;				/* Atomic operations support level */
+	__le32		max_mw;					/* Max number of memory windows */
+	__le32		max_mcast_grp;			/* Max multicast groups */
+	__le32		max_mcast_qp_attach;	/* Max QPs that can attach to one mcast group */
+	__le32		max_total_mcast_qp_attach;/* Total mcast attachments allowed */
+	__le32		max_ah;					/* Max address handles */
+	__le32		max_fast_reg_page_list_len;/* Max pages in a fast registration request */
+	__le32		max_pi_fast_reg_page_list_len;/* Max PI (protection info) pages */
+	__le16		max_pkeys;				/* Max P_Key table entries */
+	__u8		local_ca_ack_delay;		/* Local CA ACK delay (usec, encoded as log scale) */
+	__u8		reserved[5];			/* Pad to 8-byte alignment before variable area */
+
+	/*
+	 * Future extension: place additional fields here before reserved_tail,
+	 * or use a TLV (type-length-value) mechanism for extensibility.
+	 */
+	__u8		reserved_tail[64];		/* Reserved for future use (must be zero) */
+};
+
+/**
+ * enum vrdma_ctrl_cmd - Virtio RDMA verbs control commands
+ *
+ * These commands are sent from the guest driver to the host over a control virtqueue
+ * (cvq) to manage RDMA resources such as CQs, QPs, MRs, etc.
+ *
+ * @VIRTIO_RDMA_CMD_ILLEGAL: Invalid or uninitialized command (must be 0)
+ * @VIRTIO_RDMA_CMD_QUERY_PORT: Query port attributes (e.g., state, MTU, GID caps)
+ * @VIRTIO_RDMA_CMD_CREATE_CQ: Create a Completion Queue (CQ)
+ * @VIRTIO_RDMA_CMD_DESTROY_CQ: Destroy an existing CQ
+ * @VIRTIO_RDMA_CMD_CREATE_PD: Create a Protection Domain (PD)
+ * @VIRTIO_RDMA_CMD_DESTROY_PD: Destroy a PD
+ * @VIRTIO_RDMA_CMD_GET_DMA_MR: Get a DMA memory region (uncached, single-region MR)
+ * @VIRTIO_RDMA_CMD_CREATE_MR: Create a Memory Region (MR) with access flags
+ * @VIRTIO_RDMA_CMD_MAP_MR_SG: Map scatter-gather list into an MR (for fast registration)
+ * @VIRTIO_RDMA_CMD_REG_USER_MR: Register user-space memory with IOVA
+ * @VIRTIO_RDMA_CMD_DEREG_MR: Deregister and destroy an MR
+ * @VIRTIO_RDMA_CMD_CREATE_QP: Create a Queue Pair (QP)
+ * @VIRTIO_RDMA_CMD_MODIFY_QP: Modify QP state (e.g., RESET -> INIT -> RTR -> RTS)
+ * @VIRTIO_RDMA_CMD_QUERY_QP: Retrieve current QP attributes
+ * @VIRTIO_RDMA_CMD_DESTROY_QP: Destroy a QP
+ * @VIRTIO_RDMA_CMD_QUERY_PKEY: Fetch P_Key table entry at given index
+ * @VIRTIO_RDMA_CMD_ADD_GID: Add a GID (Global Identifier) to the port
+ * @VIRTIO_RDMA_CMD_DEL_GID: Remove a GID from the port
+ * @VIRTIO_RDMA_CMD_REQ_NOTIFY_CQ: Request interrupt on CQ event (equivalent to req_notify_cq())
+ *
+ * All commands are issued via the control virtqueue (cvq), and responses use
+ * the same command number with a success/failure status.
+ */
+enum vrdma_verbs_cmd {
+	VIRTIO_RDMA_CMD_ILLEGAL = 0,
+
+	VIRTIO_RDMA_CMD_QUERY_PORT,
+	VIRTIO_RDMA_CMD_CREATE_CQ,
+	VIRTIO_RDMA_CMD_DESTROY_CQ,
+	VIRTIO_RDMA_CMD_CREATE_PD,
+	VIRTIO_RDMA_CMD_DESTROY_PD,
+	VIRTIO_RDMA_CMD_GET_DMA_MR,
+	VIRTIO_RDMA_CMD_CREATE_MR,
+	VIRTIO_RDMA_CMD_MAP_MR_SG,
+	VIRTIO_RDMA_CMD_REG_USER_MR,
+	VIRTIO_RDMA_CMD_DEREG_MR,
+	VIRTIO_RDMA_CMD_CREATE_QP,
+	VIRTIO_RDMA_CMD_MODIFY_QP,
+	VIRTIO_RDMA_CMD_QUERY_QP,
+	VIRTIO_RDMA_CMD_DESTROY_QP,
+	VIRTIO_RDMA_CMD_QUERY_PKEY,
+	VIRTIO_RDMA_CMD_ADD_GID,
+	VIRTIO_RDMA_CMD_DEL_GID,
+	VIRTIO_RDMA_CMD_REQ_NOTIFY_CQ,
+};
+
+
+#endif
\ No newline at end of file
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.c b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.c
new file mode 100644
index 000000000..379bd23d3
--- /dev/null
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Xiong Weimin <xiongweimin@...inos.cn> */
+/* Copyright 2020.kylinos.cn.All Rights Reserved.*/
+
+#include <linux/scatterlist.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+#include <rdma/ib_mad.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include "vrdma.h"
+#include "vrdma_dev.h"
+#include "vrdma_dev_api.h"
+#include "vrdma_ib.h"
+
+/**
+ * cmd_str - String representation of virtio RDMA control commands
+ *
+ * This array maps each &enum virtio_rdma_ctrl_cmd value to its human-readable
+ * string for logging and debugging purposes. It is indexed directly by command ID.
+ *
+ * Example usage:
+ *	dev_dbg(dev, "Received ctrl cmd: %s\n", cmd_str[cmd]);
+ */
+static const char * const cmd_str[] = {
+	[VIRTIO_RDMA_CMD_ILLEGAL]		= "ILLEGAL",
+	[VIRTIO_RDMA_CMD_QUERY_PORT]	= "QUERY_PORT",
+	[VIRTIO_RDMA_CMD_CREATE_CQ]		= "CREATE_CQ",
+	[VIRTIO_RDMA_CMD_DESTROY_CQ]	= "DESTROY_CQ",
+	[VIRTIO_RDMA_CMD_CREATE_PD]		= "CREATE_PD",
+	[VIRTIO_RDMA_CMD_DESTROY_PD]	= "DESTROY_PD",
+	[VIRTIO_RDMA_CMD_GET_DMA_MR]	= "GET_DMA_MR",
+	[VIRTIO_RDMA_CMD_CREATE_MR]		= "CREATE_MR",
+	[VIRTIO_RDMA_CMD_MAP_MR_SG]		= "MAP_MR_SG",
+	[VIRTIO_RDMA_CMD_REG_USER_MR]	= "REG_USER_MR",
+	[VIRTIO_RDMA_CMD_DEREG_MR]		= "DEREG_MR",
+	[VIRTIO_RDMA_CMD_CREATE_QP]		= "CREATE_QP",
+	[VIRTIO_RDMA_CMD_MODIFY_QP]		= "MODIFY_QP",
+	[VIRTIO_RDMA_CMD_QUERY_QP]		= "QUERY_QP",
+	[VIRTIO_RDMA_CMD_DESTROY_QP]	= "DESTROY_QP",
+	[VIRTIO_RDMA_CMD_QUERY_PKEY]	= "QUERY_PKEY",
+	[VIRTIO_RDMA_CMD_ADD_GID]		= "ADD_GID",
+	[VIRTIO_RDMA_CMD_DEL_GID]		= "DEL_GID",
+	[VIRTIO_RDMA_CMD_REQ_NOTIFY_CQ]	= "REQ_NOTIFY_CQ",
+};
+
+static const struct ib_device_ops virtio_rdma_dev_ops = {
+	.owner = THIS_MODULE,
+	.uverbs_abi_ver = VIRTIO_RDMA_ABI_VERSION,
+	.driver_id = RDMA_DRIVER_VIRTIO,
+};
+
+/**
+ * vrdma_register_ib_device - Register the vRDMA device with IB core
+ * @vrdev: The vRDMA device to register
+ *
+ * Initializes the ib_device structure and registers it with the InfiniBand
+ * core subsystem. Must be called after queues are initialized.
+ *
+ * Returns 0 on success, or negative errno.
+ */
+int vrdma_register_ib_device(struct vrdma_dev *vrdev)
+{
+	struct ib_device *ibdev;
+	int rc;
+
+	if (!vrdev) {
+		pr_err("Invalid vrdev pointer\n");
+		return -EINVAL;
+	}
+
+	ibdev = &vrdev->ib_dev;
+
+	/* --- Step 1: Initialize static device properties --- */
+
+	ibdev->dev.parent = &vrdev->vdev->dev;  /* Point to virtio device */
+
+	ibdev->node_type = RDMA_NODE_IB_CA;
+	strncpy(ibdev->node_desc, "VirtIO RDMA", sizeof(ibdev->node_desc));
+
+	ibdev->phys_port_cnt = 1;		 /* Assume single port */
+	ibdev->num_comp_vectors = 1;	  /* One completion vector */
+
+	/* Set GUID: Use MAC-like identifier derived from device info (example) */
+	memcpy(&ibdev->node_guid, vrdev->vdev->id.device, 6);
+	*(u64 *)&ibdev->node_guid |= 0x020000 << 24; /* Make locally administered */
+
+	/* --- Step 2: Set user verbs command mask --- */
+
+	ibdev->uverbs_cmd_mask =
+		BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) |
+		BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+		BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) |
+		BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) |
+		BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) |
+		BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) |
+		BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) |
+		BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) |
+		BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) |
+		BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) |
+		BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) |
+		BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) |
+		BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) |
+		BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) |
+		BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) |
+		BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+		BIT_ULL(IB_USER_VERBS_CMD_REG_MR) |
+		BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) |
+		BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) |
+		BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) |
+		BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) |
+		BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH);
+
+	/* --- Step 3: Attach device operation vectors --- */
+	ib_set_device_ops(ibdev, &virtio_rdma_dev_ops);
+
+	/* --- Step 4: Bind to netdev (optional, for RoCE) --- */
+	if (vrdev->netdev) {
+		ib_device_set_netdev(ibdev, vrdev->netdev, 1); /* Port 1 */
+		pr_info("Bound vRDMA device to net_device '%s'\n", vrdev->netdev->name);
+	}
+
+	/* --- Step 5: Register with IB core --- */
+	rc = ib_register_device(ibdev, "vrdma%d", vrdev->vdev->dev.parent);
+	if (rc) {
+		pr_err("Failed to register vRDMA device with IB core: %d\n", rc);
+		return rc;
+	}
+
+	pr_info("Successfully registered vRDMA device as '%s'\n", dev_name(&ibdev->dev));
+	return 0;
+}
+
+/**
+ * vrdma_unregister_ib_device - Safely unregister IB device
+ * @vrdev: The vRDMA device to unregister
+ *
+ * This function unregisters the IB device from the core stack,
+ * ensuring that all client references are dropped before returning.
+ */
+void vrdma_unregister_ib_device(struct vrdma_dev *vrdev)
+{
+	if (!vrdev) {
+		pr_err("%s: invalid vrdev\n", __func__);
+		return;
+	}
+
+	if (!vrdev->ib_dev.dev.parent) {
+		pr_warn("%s: IB device not registered or already unregistered\n", __func__);
+		return;
+	}
+
+	/*
+	 * Step 1: Stop device operation - disable VQ handling, doorbells, etc.
+	 * You may want to call vrdma_stop_device(vrdev) here if exists.
+	 */
+	vrdma_finish_device(vrdev);  /* e.g., stop ctrl/intr/comp/virtqueues */
+
+	/*
+	 * Step 2: Unregister from IB core.
+	 * This will:
+	 *   - Send IB_EVENT_DEVICE_REMOVAL to all users
+	 *   - Block until all file descriptors (ucontext, etc.) are released
+	 *   - Wait for refcount to drop to zero
+	 */
+	ib_unregister_device(&vrdev->ib_dev);
+}
\ No newline at end of file
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.h b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.h
new file mode 100644
index 000000000..9a7a0a168
--- /dev/null
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.h
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Xiong Weimin <xiongweimin@...inos.cn> */
+/* Copyright 2020.kylinos.cn.All Rights Reserved.*/
+
+#ifndef __VRDMA_IB_H__
+#define __VRDMA_IB_H__
+
+#include <linux/types.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/vrdma_abi.h>
+
+enum {
+	VIRTIO_RDMA_ATOMIC_NONE,
+	VIRTIO_RDMA_ATOMIC_HCA,
+	VIRTIO_RDMA_ATOMIC_GLOB
+};
+
+static inline enum ib_atomic_cap vrdma_atomic_cap_to_ib(uint32_t src) {
+	switch (src) {
+		case VIRTIO_RDMA_ATOMIC_NONE:
+			return IB_ATOMIC_NONE;
+		case VIRTIO_RDMA_ATOMIC_HCA:
+			return IB_ATOMIC_HCA;
+		case VIRTIO_RDMA_ATOMIC_GLOB:
+			return IB_ATOMIC_GLOB;
+		default:
+			pr_warn("Unknown atomic cap");
+	}
+	return 0;
+}
+
+/**
+ * struct vrdma_vq - Wrapper around a virtqueue for RDMA use
+ * @vq: Pointer to the underlying virtqueue
+ * @lock: Spinlock to protect access to the virtqueue (especially ring updates)
+ * @name: Human-readable name (e.g., "send.0", "recv.1")
+ * @idx: Index of this queue within its type (e.g., queue pair ID)
+ *
+ * This structure wraps a virtqueue with additional metadata needed by the
+ * virtio-rdma driver, including synchronization and identification.
+ */
+struct vrdma_vq {
+	struct virtqueue *vq;
+	spinlock_t lock;		/* Protects VQ operations */
+	char name[16];			/* Name for debugging */
+	int idx;				/* Queue index */
+};
+
+/**
+ * struct vrdma_cq - Virtio RDMA completion queue
+ * @ibcq: Embedding IB core CQ object (for RDMA ABI)
+ * @cq_handle: Host-visible handle to identify this CQ in virtio messages
+ * @vq: Associated receive virtqueue used to get completions from host
+ * @entry: Mmap entry for user-space mapping of CQ ring
+ * @lock: Protects concurrent access to CQ ring and state
+ * @queue: Kernel virtual address of the CQ ring (array of CQEs)
+ * @queue_size: Total size of the CQ ring in bytes
+ * @dma_addr: DMA address of the CQ ring (for device access)
+ * @num_cqe: Number of CQE slots allocated in the ring
+ *
+ * The completion queue receives work completion notifications from the host.
+ * It is typically backed by a dedicated virtqueue that delivers CQEs.
+ */
+struct vrdma_cq {
+	struct ib_cq ibcq;
+	u32 cq_handle;
+	struct vrdma_vq *vq;  /* Virtqueue where CQEs arrive */
+	struct rdma_user_mmap_entry *entry; /* For mmap support in userspace */
+	spinlock_t lock;
+	struct virtio_rdma_cqe *queue;      /* CQE ring buffer */
+	size_t queue_size;
+	dma_addr_t dma_addr;
+	u32 num_cqe;
+};
+
+int vrdma_register_ib_device(struct vrdma_dev *vrdev);
+void vrdma_unregister_ib_device(struct vrdma_dev *vrdev);
+
+#endif
\ No newline at end of file
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_main.c b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_main.c
new file mode 100644
index 000000000..ea2f15491
--- /dev/null
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_main.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Xiong Weimin <xiongweimin@...inos.cn> */
+/* Copyright 2020.kylinos.cn.All Rights Reserved.*/
+
+#include <linux/err.h>
+#include <linux/scatterlist.h>
+#include <linux/spinlock.h>
+#include <linux/virtio.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <uapi/linux/virtio_ids.h>
+
+#include "vrdma.h"
+#include "vrdma_dev.h"
+#include "vrdma_ib.h"
+#include "vrdma_netdev.h"
+
+#include "../../../virtio/virtio_pci_common.h"
+
+/**
+ * vrdma_probe - Probe a virtio RDMA device
+ * @vdev: VirtIO device structure
+ *
+ * Called when a new virtio-rdma device is attached. Allocates the driver
+ * private structure, initializes device queues, and registers with IB core.
+ *
+ * Returns 0 on success, or negative errno on failure.
+ */
+static int vrdma_probe(struct virtio_device *vdev)
+{
+    struct vrdma_dev *vrdev;
+    int rc;
+
+    /* Step 1: Allocate IB device structure using ib_core's allocator */
+    vrdev = ib_alloc_device(vrdma_dev, ib_dev);
+    if (!vrdev) {
+        pr_err("Failed to allocate vRDMA device\n");
+        return -ENOMEM;
+    }
+
+    /* Initialize basic fields */
+    vrdev->vdev = vdev;
+    vdev->priv = vrdev;
+
+    spin_lock_init(&vrdev->ctrl_lock);
+    spin_lock_init(&vrdev->pending_mmaps_lock);
+    INIT_LIST_HEAD(&vrdev->pending_mmaps);
+
+    /* Step 2: Check doorbell mechanism support */
+    if (to_vp_device(vdev)->mdev.notify_offset_multiplier != PAGE_SIZE) {
+        pr_warn("notify_offset_multiplier=%u != PAGE_SIZE, disabling fast doorbell\n",
+                to_vp_device(vdev)->mdev.notify_offset_multiplier);
+        vrdev->fast_doorbell = false;
+    } else {
+        vrdev->fast_doorbell = true;
+    }
+
+    /* Step 3: Initialize hardware interface (virtqueues) */
+    rc = vrdma_init_device(vrdev);
+    if (rc) {
+        pr_err("Failed to initialize vRDMA device queues\n");
+        goto err_dealloc_device;
+    }
+
+	rc = vrdma_init_netdev(vrdev);
+	if (rc) {
+		pr_err("Fail to connect to NetDev layer\n");
+		goto err_cleanup_device;
+	}
+
+    /* Step 4: Register with InfiniBand core layer */
+    rc = vrdma_register_ib_device(vrdev);
+    if (rc) {
+        pr_err("Failed to register with IB subsystem\n");
+        goto err_cleanup_netdev;
+    }
+
+    return 0;
+
+err_cleanup_netdev:
+	vrdma_finish_netdev(vrdev);
+
+err_cleanup_device:
+    vrdma_finish_device(vrdev);  /* Safe cleanup of queues and reset */
+
+err_dealloc_device:
+    ib_dealloc_device(&vrdev->ib_dev);  /* Frees vrdev itself */
+	vdev->priv = NULL;
+
+    return rc;
+}
+
+static void vrdma_remove(struct virtio_device *vdev)
+{
+	struct vrdma_dev *vrdev = vdev->priv;
+
+	if (!vrdev) {
+		dev_warn(&vdev->dev, "vrdma_remove: no private data!\n");
+		return;
+	}
+
+	/* Step 1: Prevent further access by clearing private pointer */
+	vdev->priv = NULL;
+
+	/* Step 2: Stop all virtqueues and disable interrupts */
+	vdev->config->reset(vdev);
+
+	/* Step 3: Unregister IB device - waits for all user contexts to close */
+	vrdma_unregister_ib_device(vrdev);
+
+	/* Step 4: Release paired net_device reference (if any) */
+	vrdma_finish_netdev(vrdev);
+
+	/* Step 5: Clean up internal device state (vqs, doorbells, rings, etc.) */
+	vrdma_finish_device(vrdev);
+
+	/* Step 6: Finally, free the ib_device structure itself */
+	ib_dealloc_device(&vrdev->ib_dev);
+}
+
+static struct virtio_device_id id_table[] = {
+	{ VIRTIO_ID_RDMA, VIRTIO_DEV_ANY_ID },
+	{ 0 },
+};
+
+static struct virtio_driver vrdma_driver = {
+	.driver.name	= KBUILD_MODNAME,
+	.driver.owner	= THIS_MODULE,
+	.id_table	= id_table,
+	.probe		= vrdma_probe,
+	.remove		= vrdma_remove,
+};
+
+static int __init vrdma_init(void)
+{
+	int rc;
+
+	rc = register_virtio_driver(&vrdma_driver);
+	if (rc) {
+		pr_err("Failed to register VirtIO RDMA driver: error %d\n", rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+static void __exit vrdma_finish(void)
+{
+	unregister_virtio_driver(&vrdma_driver);
+}
+
+module_init(vrdma_init);
+module_exit(vrdma_finish);
+
+MODULE_DEVICE_TABLE(virtio, id_table);
+MODULE_AUTHOR("Xiongweimin");
+MODULE_DESCRIPTION("Virtio RDMA driver");
+MODULE_LICENSE("Dual BSD/GPL");
\ No newline at end of file
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_netdev.c b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_netdev.c
new file mode 100644
index 000000000..e83902e6d
--- /dev/null
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_netdev.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Xiong Weimin <xiongweimin@...inos.cn> */
+/* Copyright 2020.kylinos.cn.All Rights Reserved.*/
+
+#include <linux/netdevice.h>
+#include <linux/pci_ids.h>
+#include <linux/virtio_ids.h>
+
+#include "../../../virtio/virtio_pci_common.h"
+#include "vrdma_netdev.h"
+#include "vrdma.h"
+
+/**
+ * vrdma_init_netdev - Attempt to find paired virtio-net device on same PCI slot
+ * @vrdev: The vRDMA device
+ *
+ * WARNING: This is a non-standard hack for development/emulation environments.
+ *          Do not use in production or upstream drivers.
+ *
+ * Returns 0 on success, or negative errno.
+ */
+int vrdma_init_netdev(struct vrdma_dev *vrdev)
+{
+    struct pci_dev *pdev_net;
+    struct virtio_pci_device *vp_dev;
+    struct virtio_pci_device *vnet_pdev;
+    void *priv;
+    struct net_device *netdev;
+
+    if (!vrdev || !vrdev->vdev) {
+        pr_err("%s: invalid vrdev or vdev\n", __func__);
+        return -EINVAL;
+    }
+
+    vp_dev = to_vp_device(vrdev->vdev);
+
+    /* Find the PCI device at function 0 of the same slot */
+    pdev_net = pci_get_slot(vp_dev->pci_dev->bus,
+                            PCI_DEVFN(PCI_SLOT(vp_dev->pci_dev->devfn), 0));
+    if (!pdev_net) {
+        pr_err("Failed to find PCI device at fn=0 of slot %x\n",
+               PCI_SLOT(vp_dev->pci_dev->devfn));
+        return -ENODEV;
+    }
+
+    /* Optional: Validate it's a known virtio-net device */
+    if (pdev_net->vendor != PCI_VENDOR_ID_REDHAT_QUMRANET ||
+        pdev_net->device != 0x1041) {
+        pr_warn("PCI device %04x:%04x is not expected virtio-net (1041) device\n",
+                pdev_net->vendor, pdev_net->device);
+        pci_dev_put(pdev_net);
+        return -ENODEV;
+    }
+
+    /* Get the virtio_pci_device from drvdata */
+    vnet_pdev = pci_get_drvdata(pdev_net);
+    if (!vnet_pdev || !vnet_pdev->vdev.priv) {
+        pr_err("No driver data or priv for virtio-net device\n");
+        pci_dev_put(pdev_net);
+        return -ENODEV;
+    }
+
+    priv = vnet_pdev->vdev.priv;
+	vrdev->netdev = priv - ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
+    netdev = vrdev->netdev; 
+
+    if (!netdev || !netdev->netdev_ops) {
+        pr_err("Invalid net_device retrieved from virtio-net\n");
+        pci_dev_put(pdev_net);
+        return -ENODEV;
+    }
+
+    /* Hold reference so netdev won't disappear */
+    dev_hold(netdev);
+
+    pci_dev_put(pdev_net);  /* Release reference from pci_get_slot */
+
+    return 0;
+}
+
+/**
+ * vrdma_finish_netdev - Release reference to paired net_device
+ * @vrdev: The vRDMA device
+ *
+ * This function releases the reference taken on a net_device during
+ * vrdma_init_netdev(). It should be called during device teardown.
+ */
+void vrdma_finish_netdev(struct vrdma_dev *vrdev)
+{
+    if (!vrdev) {
+        pr_err("%s: invalid vrdev pointer\n", __func__);
+        return;
+    }
+
+    if (vrdev->netdev) {
+        pr_info("[%s]: Releasing reference to net_device '%s'\n",
+                __func__, vrdev->netdev->name);
+
+        dev_put(vrdev->netdev);
+        vrdev->netdev = NULL;
+    } else {
+        pr_debug("%s: no netdev to release\n", __func__);
+    }
+}
\ No newline at end of file
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_netdev.h b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_netdev.h
new file mode 100644
index 000000000..ce391b5bd
--- /dev/null
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_netdev.h
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Xiong Weimin <xiongweimin@...inos.cn> */
+/* Copyright 2020.kylinos.cn.All Rights Reserved.*/
+
+#ifndef __VRDMA_NETDEV_H__
+#define __VRDMA_NETDEV_H__
+
+#include "vrdma.h"
+
+int  vrdma_init_netdev(struct vrdma_dev *vrdev);
+void vrdma_finish_netdev(struct vrdma_dev *vrdev);
+
+#endif
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_queue.c b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_queue.c
new file mode 100644
index 000000000..78779c243
--- /dev/null
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_queue.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Xiong Weimin <xiongweimin@...inos.cn> */
+/* Copyright 2020.kylinos.cn.All Rights Reserved.*/
+#include <linux/virtio.h>
+
+#include "vrdma.h"
+#include "vrdma_queue.h"
+
+void vrdma_cq_ack(struct virtqueue *vq)
+{
+	struct vrdma_dev *rdev;
+	struct vrdma_cq *vcq;
+
+	rdev = vq->vdev->priv;
+	// vcq->vq's index is start from 1, 0 is ctrl vq
+	vcq = rdev->cqs[vq->index - 1];
+
+	if (vcq && vcq->ibcq.comp_handler)
+		vcq->ibcq.comp_handler(&vcq->ibcq, vcq->ibcq.cq_context);
+}
\ No newline at end of file
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_queue.h b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_queue.h
new file mode 100644
index 000000000..64b896208
--- /dev/null
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_queue.h
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Xiong Weimin <xiongweimin@...inos.cn> */
+/* Copyright 2020.kylinos.cn.All Rights Reserved.*/
+
+#ifndef __VRDMA_QUEUE_H__
+#define __VRDMA_QUEUE_H__
+
+#include "vrdma_ib.h"
+#include "vrdma_dev_api.h"
+
+void vrdma_cq_ack(struct virtqueue *vq);
+
+#endif
\ No newline at end of file
diff --git a/linux-6.16.8/include/rdma/vrdma_abi.h b/linux-6.16.8/include/rdma/vrdma_abi.h
new file mode 100644
index 000000000..62d4fda09
--- /dev/null
+++ b/linux-6.16.8/include/rdma/vrdma_abi.h
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Xiong Weimin <xiongweimin@...inos.cn> */
+/* Copyright 2020.kylinos.cn.All Rights Reserved.*/
+#ifndef __VIRTIO_RDMA_ABI_H__
+#define __VIRTIO_RDMA_ABI_H__
+
+#include <linux/types.h>
+
+#define VIRTIO_RDMA_ABI_VERSION 1
+
+/**
+ * struct vrdma_cqe - Virtio RDMA completion queue entry (CQE)
+ * @wr_id: User-provided Work Request ID (passed back on completion)
+ * @status: Completion status (%IB_WC_SUCCESS or error code)
+ * @opcode: Operation type (e.g., %IB_WC_SEND, %IB_WC_RECV)
+ * @vendor_err: Vendor-specific error code (if any)
+ * @byte_len: Number of bytes transferred in this operation
+ * @ex: Union containing additional data based on operation:
+ *      - @imm_data: Inbound immediate data (for sends with IMM)
+ *      - @invalidate_rkey: RKEY invalidated in remote invalidation
+ * @qp_num: QP number that completed this work (lower 24 bits)
+ * @src_qp: Source QP number from sender (in RC/UC)
+ * @wc_flags: Additional flags (e.g., %IB_WC_WITH_IMM, %IB_WC_GRH, %IB_WC_COMPLETION_TIMESTAMP)
+ * @pkey_index: P_Key index used for this packet
+ * @slid: Source LID (Local Identifier) of the sender
+ * @sl: Service Level used in the packet
+ * @dlid_path_bits: Path bits of the destination LID (useful in FLIT routing)
+ * @port_num: Physical port number on which the packet was received
+ *
+ * This structure represents a single completion entry delivered to a CQ.
+ * It mirrors the fields of &struct ib_wc but is designed to be serialized
+ * over the virtio control channel or ring buffer.
+ *
+ * All fields are laid out for natural alignment; no explicit padding required.
+ */
+struct virtio_rdma_cqe {
+	__u64		wr_id;			/* Work Request ID */
+	__u32		status;			/* IB_WC_* status code */
+	__u32		opcode;			/* IB_WC_* opcode */
+	__u32		vendor_err;		/* Vendor-specific error */
+	__u32		byte_len;		/* Bytes transferred */
+
+	union {
+		__u32	imm_data;			/* Immediate data (if present) */
+		__u32	invalidate_rkey;	/* RKEY invalidated */
+	} ex;
+
+	__u32		qp_num;			/* Local QP number */
+	__u32		src_qp;			/* Remote source QP */
+	__u32		wc_flags;		/* IB_WC_* flags (e.g., WITH_IMM, GRH) */
+
+	/* Connection and routing metadata */
+	__u16		pkey_index;		/* P_Key table index */
+	__u16		slid;			/* Source LID */
+	__u8		sl;				/* Service Level */
+	__u8		dlid_path_bits;	/* DLID path bits (for subnet routing) */
+	__u8		port_num;		/* Port where packet was received */
+	__u8			reserved[3];	/* Pad to maintain 8-byte alignment */
+};
+
+#endif
\ No newline at end of file
diff --git a/linux-6.16.8/include/uapi/linux/virtio_ids.h b/linux-6.16.8/include/uapi/linux/virtio_ids.h
index 7aa2eb766..ff2d0b01b 100644
--- a/linux-6.16.8/include/uapi/linux/virtio_ids.h
+++ b/linux-6.16.8/include/uapi/linux/virtio_ids.h
@@ -68,6 +68,7 @@
 #define VIRTIO_ID_AUDIO_POLICY		39 /* virtio audio policy */
 #define VIRTIO_ID_BT			40 /* virtio bluetooth */
 #define VIRTIO_ID_GPIO			41 /* virtio gpio */
+#define VIRTIO_ID_RDMA          42 /* virtio rdma */
 
 /*
  * Virtio Transitional IDs
diff --git a/linux-6.16.8/include/uapi/rdma/ib_user_ioctl_verbs.h b/linux-6.16.8/include/uapi/rdma/ib_user_ioctl_verbs.h
index fe15bc7e9..181978aa9 100644
--- a/linux-6.16.8/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/linux-6.16.8/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -255,6 +255,7 @@ enum rdma_driver_id {
 	RDMA_DRIVER_SIW,
 	RDMA_DRIVER_ERDMA,
 	RDMA_DRIVER_MANA,
+	RDMA_DRIVER_VIRTIO,
 };
 
 enum ib_uverbs_gid_type {
-- 
2.43.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ