[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251218091050.55047-4-15927021679@163.com>
Date: Thu, 18 Dec 2025 17:09:43 +0800
From: Xiong Weimin <15927021679@....com>
To: "Michael S . Tsirkin" <mst@...hat.com>,
David Hildenbrand <david@...hat.com>,
Jason Wang <jasowang@...hat.com>,
Stefano Garzarella <sgarzare@...hat.com>,
Thomas Monjalon <thomas@...jalon.net>,
David Marchand <david.marchand@...hat.com>,
Luca Boccassi <bluca@...ian.org>,
Kevin Traynor <ktraynor@...hat.com>,
Christian Ehrhardt <christian.ehrhardt@...onical.com>,
Xuan Zhuo <xuanzhuo@...ux.alibaba.com>,
Eugenio Pérez <eperezma@...hat.com>,
Xueming Li <xuemingl@...dia.com>,
Maxime Coquelin <maxime.coquelin@...hat.com>,
Chenbo Xia <chenbox@...dia.com>,
Bruce Richardson <bruce.richardson@...el.com>
Cc: kvm@...r.kernel.org,
virtualization@...ts.linux.dev,
netdev@...r.kernel.org,
xiongweimin <xiongweimin@...inos.cn>
Subject: [PATCH 03/10] drivers/infiniband/hw/virtio: Implement core device and key resource management
From: xiongweimin <xiongweimin@...inos.cn>
This commit consolidates foundational implementations for vhost-user RDMA
device driver, including:
1. Core Device Initialization:
- DPDK EAL setup with POSIX signal handling
- NUMA-aware resource allocation (packet pools, ring buffers)
- Backend netdev auto-detection (net_tap/net_vhost)
- Multi-device support with isolated RX/TX resources
- vHost-user protocol feature negotiation
2. RDMA Control Path:
- Device capability queries (VHOST_RDMA_CTRL_ROCE_QUERY_DEVICE)
- Port attribute reporting (VHOST_RDMA_CTRL_ROCE_QUERY_PORT)
- Scatterlist helpers for vmalloc/linear buffers
- Atomic memory handling for interrupt contexts
3. Resource Management:
- Protection Domains (PD) allocation/destruction
- Completion Queues (CQ) creation/destruction with:
* Kernel-mode pre-posted buffers
* Userspace mmap support for zero-copy polling
* DMA-coherent ring buffers
- Queue Pairs (QP) creation/destruction with:
* Dual-mode support (kernel/userspace)
* Dynamic WQE buffer sizing
* Doorbell register mapping
- Global bitmap-based object pools (PD/CQ/QP/AH/MR)
4. Userspace Integration:
- Detailed mmap structures for SQ/RQ rings
- Atomic counters for resource tracking
- Comprehensive error handling paths
- ABI-compliant uresponse structures
The implementation features:
- Device/port attribute reporting compliant with IB specifications
- Per-resource reference counting
- Graceful resource cleanup during destruction
- Support for both kernel and userspace memory models
Signed-off-by: Xiong Weimin <xiongweimin@...inos.cn>
---
---
.../drivers/infiniband/hw/virtio/vrdma.h | 5 +
.../drivers/infiniband/hw/virtio/vrdma_abi.h | 279 ++++
.../infiniband/hw/virtio/vrdma_dev_api.h | 46 +
.../drivers/infiniband/hw/virtio/vrdma_ib.c | 1178 +++++++++++++++--
.../drivers/infiniband/hw/virtio/vrdma_ib.h | 106 +-
.../drivers/infiniband/hw/virtio/vrdma_main.c | 86 +-
.../drivers/infiniband/hw/virtio/vrdma_mmap.h | 88 ++
.../infiniband/hw/virtio/vrdma_netdev.c | 130 +-
.../infiniband/hw/virtio/vrdma_queue.c | 110 ++
.../infiniband/hw/virtio/vrdma_queue.h | 3 +-
linux-6.16.8/include/rdma/ib_verbs.h | 9 +
11 files changed, 1806 insertions(+), 234 deletions(-)
create mode 100644 linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_abi.h
create mode 100644 linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_mmap.h
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma.h b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma.h
index a646794ef..99909446f 100644
--- a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma.h
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma.h
@@ -80,4 +80,9 @@ struct vrdma_dev {
bool fast_doorbell;
};
+static inline struct vrdma_dev *to_vdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct vrdma_dev, ib_dev);
+}
+
#endif
\ No newline at end of file
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_abi.h b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_abi.h
new file mode 100644
index 000000000..7cdc4e488
--- /dev/null
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_abi.h
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Xiong Weimin <xiongweimin@...inos.cn> */
+/* Copyright 2020.kylinos.cn.All Rights Reserved.*/
+#ifndef __VRDMA_ABI_H__
+#define __VRDMA_ABI_H__
+
+#include <linux/types.h>
+
+#define VRDMA_ABI_VERSION 1
+
+/**
+ * struct vrdma_cqe - Virtio-RDMA Completion Queue Entry (CQE)
+ *
+ * This structure represents a single completion entry in the Completion Queue (CQ).
+ * It is written by the kernel driver (or directly by the backend via shared memory)
+ * when a Work Request (WR) completes, and is read by userspace applications during
+ * polling or event handling.
+ *
+ * The layout matches the semantics of `struct ib_wc` but is exposed to userspace
+ * for zero-copy access. All fields use native byte order (little-endian assumed),
+ * as virtio is inherently little-endian.
+ *
+ * @wr_id: User-provided WR identifier, copied from the original send/receive request.
+ * Used to correlate completions with outstanding operations.
+ * @status: Completion status (e.g., IB_WC_SUCCESS, IB_WC_RETRY_EXCEEDED_ERR, etc.).
+ * See &enum ib_wc_status.
+ * @opcode: Operation type that completed (e.g., IB_WC_SEND, IB_WC_RECV, IB_WC_RDMA_WRITE).
+ * See &enum ib_wc_opcode.
+ * @vendor_err: Vendor-specific error code (if any). Typically 0 on success.
+ * @byte_len: Number of bytes transferred in this operation.
+ * @ex: Union containing additional data based on operation type:
+ * - @imm_data: Immediate data received in a SEND with immediate flag.
+ * - @invalidate_rkey: RKEY invalidated by an RDMA WRITE with invalidate.
+ * @qp_num: Source QP number (for incoming completions, this is the remote QP).
+ * @src_qp: Alias for @qp_num; kept for symmetry with IB core naming.
+ * @wc_flags: Bitmask of completion flags (e.g., IB_WC_GRH, IB_WC_WITH_IMM, IB_WC_COMPLETION_TIMESTAMP).
+ * See &enum ib_wc_flags.
+ * @pkey_index: Partition Key index used for this packet (local context).
+ * @slid: Source LID (16-bit), valid only for IB transport if GRH not present.
+ * @sl: Service Level (4 bits), extracted from the packet header.
+ * @dlid_path_bits: Encodes either DLID path bits (in RoCE/IB) or switch path information.
+ * @port_num: Physical port number on which the packet was received.
+ *
+ * Note:
+ * This structure is mapped into userspace via mmap() along with the CQ ring buffer.
+ * Applications poll this array for new completions without system calls.
+ *
+ * Memory Layout Example:
+ *
+ * struct vrdma_cqe cq_ring[N];
+ *
+ * while (polling) {
+ * struct vrdma_cqe *cqe = &cq_ring[head];
+ * if (cqe->status == VZ80_CQ_STATUS_EMPTY)
+ * break; // no more completions
+ *
+ * process_completion(cqe);
+ * cqe->status = VZ80_CQ_STATUS_PROCESSED; // optional acknowledgment
+ * head = (head + 1) % N;
+ * }
+ *
+ * Alignment: Must be aligned to 8-byte boundary. Size: typically 64 bytes.
+ */
+struct vrdma_cqe {
+ __u64 wr_id; /* [out] User-defined WR ID */
+ __u32 status; /* [out] Status of the completed WR */
+ __u32 opcode; /* [out] Type of operation completed */
+ __u32 vendor_err; /* [out] Vendor-specific error code */
+ __u32 byte_len; /* [out] Number of bytes transferred */
+
+ union {
+ __u32 imm_data; /* [out] Immediate data (if IBV_WC_WITH_IMM) */
+ __u32 invalidate_rkey; /* [out] RKEY invalidated (if IBV_WC_WITH_INVALIDATE) */
+ } ex;
+
+ __u32 qp_num; /* [out] Remote QP number (source QP) */
+ __u32 src_qp; /* [out] Alias of qp_num for clarity */
+ int wc_flags; /* [out] Flags (e.g., IB_WC_GRH, IB_WC_WITH_IMM) */
+
+ __u16 pkey_index; /* [out] P_Key index used */
+ __u16 slid; /* [out] Source LID (16-bit) */
+ __u8 sl; /* [out] Service Level */
+ __u8 dlid_path_bits; /* [out] DLID path bits / switch routing info */
+ __u8 port_num; /* [out] Port number where packet was received */
+ __u8 reserved[3]; /* Pad to maintain 8-byte alignment */
+};
+
+/**
+ * struct vrdma_create_cq_uresp - Response to userspace on CQ creation with mmap support
+ * @offset: File offset to be used in mmap() for mapping the CQ and vring.
+ * Passed back from kernel via rdma_user_mmap_get_offset().
+ * Userspace does: mmap(0, size, PROT_READ, MAP_SHARED, fd, offset);
+ * @cq_size: Total size of the mapped region, including:
+ * - CQ event ring (array of struct vrdma_cqe)
+ * - Virtqueue structure (descriptor table, available, used rings)
+ * Must be page-aligned.
+ * @cq_phys_addr: Physical address of the CQ ring buffer (optional).
+ * May be used by userspace for debugging or memory inspection tools.
+ * @used_off: Offset of the "used" ring within the virtqueue's memory layout.
+ * Calculated as: used_ring_addr - desc_table_addr.
+ * Allows userspace to directly map and read completions without syscalls.
+ * @vq_size: Size of the entire virtqueue (including padding), page-aligned.
+ * Used by userspace to determine how much extra memory to map beyond CQ ring.
+ * @num_cqe: Number of CQ entries (completion queue depth) allocated.
+ * Useful for bounds checking in userspace.
+ * @num_cvqe: Number of completion virtqueue elements (i.e., size of vring).
+ * Corresponds to virtqueue_get_vring_size(vcq->vq->vq).
+ * Indicates how many completion events can be queued.
+ *
+ * This structure is passed from kernel to userspace via ib_copy_to_udata()
+ * during CQ creation when a user context is provided. It enables zero-copy,
+ * polling-based completion handling by allowing userspace to directly access:
+ * - The CQ event ring (for reading work completions)
+ * - The virtqueue used ring (to detect when device has posted new completions)
+ *
+ * Memory Layout After mmap():
+ *
+ * +---------------------+
+ * | CQ Event Ring | <- Mapped at base addr
+ * | (num_cqe entries) |
+ * +---------------------+
+ * | Virtqueue: |
+ * | - Desc Table |
+ * | - Available Ring |
+ * | - Used Ring | <- Accessed via (base + used_off)
+ * +---------------------+
+ *
+ * Example usage in userspace:
+ *
+ * void *addr = mmap(NULL, uresp.cq_size, PROT_READ, MAP_SHARED,
+ * ctx->cmd_fd, uresp.offset);
+ * struct vrdma_cqe *cqe_ring = addr;
+ * struct vring_used *used_ring = addr + uresp.used_off;
+ */
+struct vrdma_create_cq_uresp {
+ __u64 offset; /* mmap offset for userspace */
+ __u64 cq_size; /* total size to map (CQ + vring) */
+ __u64 cq_phys_addr; /* physical address of CQ ring (hint) */
+ __u64 used_off; /* offset to used ring inside vring */
+ __u32 vq_size; /* size of the virtqueue (aligned) */
+ int num_cqe; /* number of CQ entries */
+ int num_cvqe; /* number of completion VQ descriptors */
+};
+
+struct vrdma_alloc_pd_uresp {
+ __u32 pdn;
+};
+
+/**
+ * struct vrdma_create_qp_uresp - User response for QP creation in virtio-rdma
+ * @sq_mmap_offset: Offset to mmap the Send Queue (SQ) ring buffer
+ * @sq_mmap_size: Size of the SQ ring buffer available for mmap
+ * @sq_db_addr: Physical address (or token) for SQ doorbell register access
+ * @svq_used_idx_off: Offset within SQ mmap where used index is stored (polling support)
+ * @svq_ring_size: Number of entries in the backend's send virtqueue
+ * @num_sq_wqes: Maximum number of SQ WQEs this QP can post
+ * @sq_head_idx: Current head index in kernel's SQ ring (optional debug info)
+
+ * @rq_mmap_offset: Offset to mmap the Receive Queue (RQ) ring buffer
+ * @rq_mmap_size: Size of the RQ ring buffer available for mmap
+ * @rq_db_addr: Physical address (or token) for RQ doorbell register access
+ * @rvq_used_idx_off: Offset within RQ mmap where used index is stored
+ * @rvq_ring_size: Number of entries in the backend's receive virtqueue
+ * @num_rq_wqes: Maximum number of RQ WQEs this QP can post
+ * @rq_head_idx: Current head index in kernel's RQ ring
+
+ * @notifier_size: Size of notification area (e.g., CQ notifier, event counter)
+ * @qp_handle: Unique identifier for this QP (qpn)
+ *
+ * This structure is passed back to userspace via `ib_copy_to_udata()`
+ * during QP creation. It allows userspace to:
+ * - Map SQ/RQ rings into its address space
+ * - Access doorbells directly (if supported)
+ * - Poll for completion status via used index
+ */
+struct vrdma_create_qp_uresp {
+ __u64 sq_mmap_offset;
+ __u64 sq_mmap_size;
+ __u64 sq_db_addr;
+ __u64 svq_used_idx_off;
+ __u32 svq_ring_size;
+ __u32 num_sq_wqes;
+ __u32 num_svqe;
+ __u32 sq_head_idx;
+
+ __u64 rq_mmap_offset;
+ __u64 rq_mmap_size;
+ __u64 rq_db_addr;
+ __u64 rvq_used_idx_off;
+ __u32 rvq_ring_size;
+ __u32 num_rq_wqes;
+ __u32 num_rvqe;
+ __u32 rq_head_idx;
+
+ __u32 notifier_size;
+
+ __u32 qp_handle;
+};
+
+/**
+ * struct vrdma_av - Address Vector for Virtio-RDMA QP routing
+ *
+ * An Address Vector (AV) contains L2/L3 network path information used to
+ * route packets from a UD or RC QP to a remote destination. It is analogous
+ * to InfiniBand's AV structure in user verbs.
+ *
+ * All fields use fixed-width types for ABI stability across architectures.
+ */
+struct vrdma_av {
+ __u32 port:8; /* Physical port index (1-based) */
+ __u32 pdn:8; /* Port DN (Delivery Notification) handle or VPORT ID */
+ __u32 sl_tclass_flowlabel:16; /* Combined SL (4), TClass (8), Flow Label (20) */
+
+ __u8 dgid[16]; /* Destination Global Identifier (GID), big-endian */
+
+ __u8 gid_index; /* Outbound GID table index (for source GID selection) */
+ __u8 stat_rate; /* Static rate control (enum ibv_rate) */
+ __u8 hop_limit; /* IPv6-style hop limit / TTL */
+ __u8 dmac[6]; /* Destination MAC address (for L2 forwarding) */
+
+ __u8 reserved[6]; /* Reserved for future use / alignment padding */
+};
+
+/**
+ * struct vrdma_cmd_post_send - User-space command to post a Send WQE
+ *
+ * This structure is passed from userspace via ioctl (e.g., WRITE on uverbs char dev)
+ * to request posting one or more work queue entries (WQEs) on the Send Queue (SQ).
+ * It mirrors the semantics of `ibv_post_send()` in libibverbs.
+ *
+ * All fields use fixed-size types for ABI stability across architectures.
+ */
+struct vrdma_cmd_post_send {
+ __u32 num_sge; /* Number of scatter-gather elements in this WQE */
+
+ __u32 send_flags; /* IBV_SEND_xxx flags (e.g., signaled, inline, fence) */
+ __u32 opcode; /* Operation code: RDMA_WRITE, SEND, ATOMIC, etc. */
+ __u64 wr_id; /* Work Request ID returned in CQE */
+
+ union {
+ __be32 imm_data; /* Immediate data for RC/UC QPs */
+ __u32 invalidate_rkey; /* rkey to invalidate (on SEND_WITH_INV) */
+ } ex;
+
+ union wr_data {
+ struct {
+ __u64 remote_addr; /* Target virtual address for RDMA op */
+ __u32 rkey; /* Remote key for memory access */
+ } rdma;
+
+ struct {
+ __u64 remote_addr; /* Address of atomic variable */
+ __u64 compare_add; /* Value to compare */
+ __u64 swap; /* Value to swap (or add) */
+ __u32 rkey; /* Remote memory key */
+ } atomic;
+
+ struct {
+ __u32 remote_qpn; /* Destination QP number */
+ __u32 remote_qkey; /* Q_Key for UD packet validation */
+ struct vrdma_av av; /* Address vector (L2/L3 info) */
+ } ud;
+
+ struct {
+ __u32 mrn; /* Memory Region Number (MR handle) */
+ __u32 key; /* Staging rkey for MR registration */
+ __u32 access; /* Access flags (IB_ACCESS_xxx) */
+ } reg;
+ } wr;
+};
+
+struct vrdma_sge {
+ __u64 addr;
+ __u32 length;
+ __u32 lkey;
+};
+
+#endif
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev_api.h b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev_api.h
index 3b1f7d2b6..d1db1bea4 100644
--- a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev_api.h
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev_api.h
@@ -114,6 +114,52 @@ enum vrdma_verbs_cmd {
VIRTIO_RDMA_CMD_REQ_NOTIFY_CQ,
};
+struct vrdma_cmd_query_port {
+ u32 port;
+};
+
+struct vrdma_cmd_create_cq {
+ u32 cqe;
+};
+
+struct vrdma_rsp_create_cq {
+ u32 cqn;
+};
+
+struct vrdma_cmd_destroy_cq {
+ u32 cqn;
+};
+
+struct vrdma_rsp_create_pd {
+ __u32 pdn;
+};
+
+struct vrdma_cmd_destroy_pd {
+ __u32 pdn;
+};
+
+struct vrdma_cmd_create_qp {
+ __u32 pdn;
+ __u8 qp_type;
+ __u8 sq_sig_type;
+ __u32 max_send_wr;
+ __u32 max_send_sge;
+ __u32 send_cqn;
+ __u32 max_recv_wr;
+ __u32 max_recv_sge;
+ __u32 recv_cqn;
+
+ __u32 max_inline_data;
+};
+
+struct vrdma_rsp_create_qp {
+ __u32 qpn;
+};
+
+struct vrdma_cmd_destroy_qp {
+ __u32 qpn;
+};
+
#define VRDMA_CTRL_OK 0
#define VRDMA_CTRL_ERR 1
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.c b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.c
index 825ec58bd..f1f53314f 100644
--- a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.c
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.c
@@ -17,6 +17,9 @@
#include "vrdma_dev.h"
#include "vrdma_dev_api.h"
#include "vrdma_ib.h"
+#include "vrdma_abi.h"
+#include "vrdma_mmap.h"
+#include "vrdma_queue.h"
/**
* cmd_str - String representation of virtio RDMA control commands
@@ -61,110 +64,1043 @@ static const char * const cmd_str[] = {
* Return: 0 on success, negative errno on failure.
*/
static int vrdma_exec_verbs_cmd(struct vrdma_dev *vrdev, int verbs_cmd,
- struct scatterlist *verbs_in,
- struct scatterlist *verbs_out)
+ struct scatterlist *verbs_in,
+ struct scatterlist *verbs_out)
{
- struct vrdma_info *vrdma_info = netdev_priv(vrdev->netdev);
- struct virtqueue *vq = vrdev->ctrl_vq;
- struct verbs_ctrl_buf *ctrl_buf;
- struct scatterlist hdr_sg, status_sg;
- struct scatterlist *sgs[4];
- unsigned int out_num = 1, in_num = 1;
- unsigned int len;
- int ret, timeout_loops = VRDMA_COMM_TIMEOUT;
- unsigned long flags;
-
- if (unlikely(!vq)) {
- netdev_err(vrdma_info->dev, "Missing control virtqueue\n");
- return -EINVAL;
- }
-
- ctrl_buf = kmalloc(sizeof(*ctrl_buf), GFP_ATOMIC);
- if (!ctrl_buf) {
- goto unlock;
- }
- ctrl_buf->cmd = verbs_cmd;
- ctrl_buf->status = ~0U;
-
- /* Prepare scatterlists for sending command and receiving status */
- sg_init_one(&hdr_sg, &ctrl_buf->cmd, sizeof(ctrl_buf->cmd));
- sgs[0] = &hdr_sg;
-
- if (verbs_in) {
- sgs[1] = verbs_in;
+ struct vrdma_info *vrdma_info = netdev_priv(vrdev->netdev);
+ struct virtqueue *vq = vrdev->ctrl_vq;
+ struct verbs_ctrl_buf *ctrl_buf;
+ struct scatterlist hdr_sg, status_sg;
+ struct scatterlist *sgs[4];
+ unsigned int out_num = 1, in_num = 1;
+ unsigned int len;
+ int ret, timeout_loops = VRDMA_COMM_TIMEOUT;
+ unsigned long flags;
+
+ if (unlikely(!vq)) {
+ netdev_err(vrdma_info->dev, "Missing control virtqueue\n");
+ return -EINVAL;
+ }
+
+ ctrl_buf = kmalloc(sizeof(*ctrl_buf), GFP_ATOMIC);
+ if(!ctrl_buf){
+ goto unlock;
+ }
+ ctrl_buf->cmd = verbs_cmd;
+ ctrl_buf->status = 0x01;
+
+ /* Prepare scatterlists for sending command and receiving status */
+ sg_init_one(&hdr_sg, &ctrl_buf->cmd, sizeof(ctrl_buf->cmd));
+ sgs[0] = &hdr_sg;
+
+ if (verbs_in) {
+ sgs[1] = verbs_in;
in_num++;
- }
+ }
- sg_init_one(&status_sg, &ctrl_buf->status, sizeof(ctrl_buf->status));
- sgs[in_num] = &status_sg;
+ sg_init_one(&status_sg, &ctrl_buf->status, sizeof(ctrl_buf->status));
+ sgs[in_num] = &status_sg;
- if (verbs_out) {
- sgs[in_num + 1] = verbs_out;
+ if (verbs_out) {
+ sgs[in_num + 1] = verbs_out;
out_num++;
- }
-
- spin_lock_irqsave(&vrdev->ctrl_lock, flags);
-
- ret = virtqueue_add_sgs(vq, sgs, in_num, out_num, vrdev, GFP_ATOMIC);
- if (ret) {
- netdev_err(vrdma_info->dev, "Failed to add cmd %d to CVQ: %d\n",
- verbs_cmd, ret);
- goto unlock;
- }
-
- if (unlikely(!virtqueue_kick(vq))) {
- netdev_err(vrdma_info->dev, "Failed to kick CVQ for cmd %d\n", verbs_cmd);
- ret = -EIO;
- goto unlock;
- }
-
- /* Wait for response: loop with timeout to avoid infinite blocking */
- ret = -ETIMEDOUT;
- while (1) {
- if (virtqueue_get_buf(vq, &len)) {
- ret = 0;
- break;
- }
- if (unlikely(virtqueue_is_broken(vq))) {
- netdev_err(vrdma_info->dev, "CVQ is broken\n");
- ret = -EIO;
- break;
- }
- cpu_relax();
- /*
- * Prevent infinite wait. In non-atomic context, consider using schedule_timeout()
- * for better CPU utilization.
- */
- if (!--timeout_loops) {
- netdev_err(vrdma_info->dev, "Timeout waiting for cmd %d response\n",
- verbs_cmd);
- break;
- }
- }
+ }
+
+ spin_lock_irqsave(&vrdev->ctrl_lock, flags);
+
+ ret = virtqueue_add_sgs(vq, sgs, in_num, out_num, vrdev, GFP_ATOMIC);
+ if (ret) {
+ netdev_err(vrdma_info->dev, "Failed to add cmd %d to CVQ: %d\n",
+ verbs_cmd, ret);
+ goto unlock;
+ }
+
+ if (unlikely(!virtqueue_kick(vq))) {
+ netdev_err(vrdma_info->dev, "Failed to kick CVQ for cmd %d\n", verbs_cmd);
+ ret = -EIO;
+ goto unlock;
+ }
+
+ /* Wait for response: loop with timeout to avoid infinite blocking */
+ ret = -ETIMEDOUT;
+ while (1) {
+ if (virtqueue_get_buf(vq, &len)) {
+ ret = 0;
+ break;
+ }
+ if (unlikely(virtqueue_is_broken(vq))) {
+ netdev_err(vrdma_info->dev, "CVQ is broken\n");
+ ret = -EIO;
+ break;
+ }
+ cpu_relax();
+ /*
+ * Prevent infinite wait. In non-atomic context, consider using schedule_timeout()
+ * for better CPU utilization.
+ */
+ if (!--timeout_loops) {
+ netdev_err(vrdma_info->dev, "Timeout waiting for cmd %d response\n",
+ verbs_cmd);
+ break;
+ }
+ }
unlock:
- spin_unlock_irqrestore(&vrdev->ctrl_lock, flags);
-
- /* Log final result */
- if (ret == 0 && ctrl_buf->status != VRDMA_CTRL_OK) {
- netdev_err(vrdma_info->dev, "EXEC cmd %s failed: status=%d\n",
- cmd_str[verbs_cmd], ctrl_buf->status);
- ret = -EIO; /* Host returned an error status */
- } else if (ret == 0) {
- netdev_dbg(vrdma_info->dev, "EXEC cmd %s OK\n", cmd_str[verbs_cmd]);
- } else {
- netdev_err(vrdma_info->dev, "EXEC cmd %s failed: ret=%d\n",
- cmd_str[verbs_cmd], ret);
- }
-
- kfree(ctrl_buf);
- return ret;
+ spin_unlock_irqrestore(&vrdev->ctrl_lock, flags);
+
+ /* Log final result */
+ if (ret == 0 && ctrl_buf->status != VRDMA_CTRL_OK) {
+ netdev_err(vrdma_info->dev, "EXEC cmd %s failed: status=%d\n",
+ cmd_str[verbs_cmd], ctrl_buf->status);
+ ret = -EIO; /* Host returned an error status */
+ } else if (ret == 0) {
+ netdev_dbg(vrdma_info->dev, "EXEC cmd %s OK\n", cmd_str[verbs_cmd]);
+ } else {
+ netdev_err(vrdma_info->dev, "EXEC cmd %s failed: ret=%d\n",
+ cmd_str[verbs_cmd], ret);
+ }
+
+ kfree(ctrl_buf);
+ return ret;
+}
+
+static int vrdma_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int ret;
+
+ ret = ib_query_port(ibdev, port_num, &attr);
+ if (ret)
+ return ret;
+
+ immutable->core_cap_flags = RDMA_CORE_PORT_VIRTIO;
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+
+ return 0;
+}
+
+static int vrdma_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *ib_dev_attr,
+ struct ib_udata *udata)
+{
+ if (udata->inlen || udata->outlen)
+ return -EINVAL;
+
+ *ib_dev_attr = to_vdev(ibdev)->attr;
+ return 0;
}
-static const struct ib_device_ops virtio_rdma_dev_ops = {
+static struct scatterlist* vrdma_init_sg(void* buf, unsigned long nbytes)
+{
+ struct scatterlist* need_sg;
+ int num_page = 0;
+ unsigned long offset;
+ void* ptr;
+
+ if (is_vmalloc_addr(buf)) {
+ int i;
+ unsigned long remaining = nbytes;
+
+ ptr = buf;
+ offset = offset_in_page(ptr);
+ num_page = 1;
+ if (offset + nbytes > PAGE_SIZE) {
+ num_page += (offset + nbytes - PAGE_SIZE + PAGE_SIZE - 1) / PAGE_SIZE;
+ }
+
+ need_sg = kmalloc_array(num_page, sizeof(*need_sg), GFP_ATOMIC);
+ if (!need_sg)
+ return NULL;
+
+ sg_init_table(need_sg, num_page);
+
+ for (i = 0; i < num_page; i++) {
+ struct page *page;
+ unsigned int len;
+ unsigned int off_in_page;
+
+ off_in_page = offset_in_page(ptr);
+ len = min((unsigned long)(PAGE_SIZE - off_in_page), remaining);
+
+ page = vmalloc_to_page(ptr);
+ if (!page) {
+ kfree(need_sg);
+ return NULL;
+ }
+
+ sg_set_page(&need_sg[i], page, len, off_in_page);
+
+ ptr += len;
+ remaining -= len;
+ }
+ } else {
+ need_sg = kmalloc(sizeof(*need_sg), GFP_ATOMIC);
+ if (!need_sg)
+ return NULL;
+
+ sg_init_one(need_sg, buf, nbytes);
+ }
+
+ return need_sg;
+}
+
+static int vrdma_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *props)
+{
+ struct vrdma_dev *vdev = to_vdev(ibdev);
+ struct vrdma_cmd_query_port *cmd;
+ struct vrdma_port_attr port_attr;
+ struct scatterlist in_sgl, *out_sgl;
+ int ret;
+
+ memset(&port_attr, 0, sizeof(port_attr));
+
+ cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC);
+ if (!cmd)
+ return -ENOMEM;
+
+ out_sgl = vrdma_init_sg(&port_attr, sizeof(port_attr));
+ if (!out_sgl) {
+ kfree(cmd);
+ return -ENOMEM;
+ }
+
+ cmd->port = port;
+ sg_init_one(&in_sgl, cmd, sizeof(*cmd));
+
+ ret = vrdma_exec_verbs_cmd(vdev, VIRTIO_RDMA_CMD_QUERY_PORT, &in_sgl, out_sgl);
+ if (!ret) {
+ props->state = port_attr.state;
+ props->max_mtu = port_attr.max_mtu;
+ props->active_mtu = port_attr.active_mtu;
+ props->phys_mtu = port_attr.phys_mtu;
+ props->gid_tbl_len = port_attr.gid_tbl_len;
+ props->port_cap_flags = port_attr.port_cap_flags;
+ props->max_msg_sz = port_attr.max_msg_sz;
+ props->bad_pkey_cntr = port_attr.bad_pkey_cntr;
+ props->qkey_viol_cntr = port_attr.qkey_viol_cntr;
+ props->pkey_tbl_len = port_attr.pkey_tbl_len;
+ props->active_width = port_attr.active_width;
+ props->active_speed = port_attr.active_speed;
+ props->phys_state = port_attr.phys_state;
+
+ props->ip_gids = 1;
+ props->sm_lid = 0;
+ props->lid = 0;
+ props->lmc = 0;
+ props->max_vl_num = 1;
+ props->sm_sl = 0;
+ props->subnet_timeout = 0;
+ props->init_type_reply = 0;
+ props->port_cap_flags2 = 0;
+ }
+
+ kfree(out_sgl);
+ kfree(cmd);
+
+ return ret;
+}
+
+static struct net_device *vrdma_get_netdev(struct ib_device *ibdev,
+ u32 port_num)
+{
+ struct vrdma_dev *vrdev = to_vdev(ibdev);
+ return vrdev->netdev;
+}
+
+/**
+ * vrdma_create_cq - Create a Completion Queue (CQ) for virtio-rdma device
+ * @ibcq: Pointer to the InfiniBand CQ structure to be initialized
+ * @attr: Attributes for CQ initialization, including requested depth (cqe)
+ * @attr_bundle: Bundle containing user context and attributes (includes udata)
+ *
+ * This function creates a Completion Queue (CQ) in the virtio-rdma driver,
+ * which is used to report completion events from asynchronous operations such
+ * as sends, receives, and memory accesses.
+ *
+ * The function performs the following steps:
+ * 1. Enforces per-device CQ count limits.
+ * 2. Allocates a DMA-coherent ring buffer for storing completion entries.
+ * 3. Communicates with the backend via a virtqueue command to create the CQ
+ * and obtain a hardware handle (cqn).
+ * 4. Sets up zero-copy user-space access through mmap() if @udata is provided.
+ * 5. Initializes kernel-side state, including locking and event handling.
+ *
+ * If @attr_bundle->ucore (i.e., udata) is non-NULL:
+ * - A user-mappable region is created that includes:
+ * a) The CQ event ring (array of struct vrdma_cqe)
+ * b) The associated virtqueue's used ring (for polling completions)
+ * - Metadata required for mmap setup (offset, sizes, addresses) is returned
+ * to userspace via ib_copy_to_udata().
+ *
+ * If @udata is NULL (kernel-only CQ):
+ * - The driver pre-posts receive buffers on the CQ's dedicated virtqueue
+ * so that completion messages from the device can be received directly
+ * into the kernel-managed CQ ring.
+ * - No mmap support is enabled.
+ *
+ * Memory Layout Mapped to Userspace:
+ *
+ * +------------------------+ <-- mapped base address
+ * | CQ Event Ring |
+ * | (num_cqe x vrdma_cqe) |
+ * +------------------------+
+ * | Virtqueue Structure |
+ * | - Descriptor Table |
+ * | - Available Ring |
+ * | - Used Ring | <-- accessed at (base + used_off)
+ * +------------------------+
+ *
+ * Usage by Userspace:
+ * After receiving @offset and @cq_size, userspace calls:
+ * mmap(NULL, cq_size, PROT_READ, MAP_SHARED, fd, offset);
+ * Then polls the used ring to detect new completions without syscalls.
+ *
+ * Return:
+ * 0 on success, or negative error code (e.g., -ENOMEM, -EINVAL, -EIO).
+ */
+static int vrdma_create_cq(struct ib_cq *ibcq,
+ const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attr_bundle)
+{
+ struct scatterlist in, out;
+ struct vrdma_cq *vcq = to_vcq(ibcq);
+ struct vrdma_dev *vdev = to_vdev(ibcq->device);
+ struct vrdma_cmd_create_cq *cmd;
+ struct vrdma_rsp_create_cq *rsp;
+ struct scatterlist sg;
+ struct ib_udata *udata;
+ int entries = attr->cqe;
+ size_t total_size;
+ struct vrdma_user_mmap_entry *entry = NULL;
+ int ret;
+
+ if(!attr_bundle)
+ udata = NULL;
+ else
+ udata = &attr_bundle->driver_udata;
+
+ /* Enforce maximum number of CQs per device */
+ if (!atomic_add_unless(&vdev->num_cq, 1, vdev->ib_dev.attrs.max_cq)) {
+ dev_dbg(&vdev->vdev->dev, "max CQ limit reached: %u\n",
+ vdev->ib_dev.attrs.max_cq);
+ return -ENOMEM;
+ }
+
+ /* Allocate CQ ring buffer: array of vrdma_cqe entries */
+ total_size = PAGE_ALIGN(entries * sizeof(struct vrdma_cqe));
+ vcq->queue_size = total_size;
+ vcq->queue = dma_alloc_coherent(vdev->vdev->dev.parent, vcq->queue_size,
+ &vcq->dma_addr, GFP_KERNEL);
+ if (!vcq->queue) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ /* Prepare command and response structures */
+ cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+ if (!cmd) {
+ ret = -ENOMEM;
+ goto err_free_queue;
+ }
+
+ rsp = kmalloc(sizeof(*rsp), GFP_KERNEL);
+ if (!rsp) {
+ ret = -ENOMEM;
+ goto err_free_cmd;
+ }
+
+ /* Optional: allocate mmap entry if userspace mapping is requested */
+ if (udata) {
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ ret = -ENOMEM;
+ goto err_free_rsp;
+ }
+ }
+
+ /* Fill command parameters */
+ cmd->cqe = entries;
+ sg_init_one(&in, cmd, sizeof(*cmd));
+ sg_init_one(&out, rsp, sizeof(*rsp));
+
+ /* Send command to backend device */
+ ret = vrdma_exec_verbs_cmd(vdev, VIRTIO_RDMA_CMD_CREATE_CQ, &in, &out);
+ if (ret) {
+ dev_err(&vdev->vdev->dev, "CREATE_CQ cmd failed: %d\n", ret);
+ goto err_free_entry;
+ }
+
+ /* Initialize CQ fields from response */
+ vcq->cq_handle = rsp->cqn;
+ vcq->ibcq.cqe = entries;
+ vcq->num_cqe = entries;
+ vcq->vq = &vdev->cq_vqs[rsp->cqn]; /* Assigned virtqueue for this CQ */
+ vdev->cqs[rsp->cqn] = vcq;
+
+ /* Userspace mapping setup */
+ if (udata) {
+ struct vrdma_create_cq_uresp uresp = {};
+ struct vrdma_ucontext *uctx =rdma_udata_to_drv_context(udata, struct vrdma_ucontext, ibucontext);
+
+ entry->mmap_type = VRDMA_MMAP_CQ;
+ entry->vq = vcq->vq->vq;
+ entry->user_buf = vcq->queue;
+ entry->ubuf_size = vcq->queue_size;
+
+ /* Calculate used ring offset within descriptor table */
+ uresp.used_off = virtqueue_get_used_addr(vcq->vq->vq) -
+ virtqueue_get_desc_addr(vcq->vq->vq);
+
+ /* Align vring size to page boundary for mmap */
+ uresp.vq_size = PAGE_ALIGN(vring_size(virtqueue_get_vring_size(vcq->vq->vq),
+ SMP_CACHE_BYTES));
+ total_size += uresp.vq_size;
+
+ /* Insert mmap entry into user context */
+ ret = rdma_user_mmap_entry_insert(&uctx->ibucontext,
+ &entry->rdma_entry,
+ total_size);
+ if (ret) {
+ dev_err(&vdev->vdev->dev,
+ "Failed to insert mmap entry for CQ: %d\n", ret);
+ goto err_free_entry;
+ }
+
+ /* Populate response to userspace */
+ uresp.offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+ uresp.cq_phys_addr = virt_to_phys(vcq->queue);
+ uresp.num_cqe = entries;
+ uresp.num_cvqe = virtqueue_get_vring_size(vcq->vq->vq);
+ uresp.cq_size = total_size;
+
+ if (udata->outlen < sizeof(uresp)) {
+ ret = -EINVAL;
+ goto err_remove_mmap;
+ }
+
+ ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (ret) {
+ dev_err(&vdev->vdev->dev,
+ "Failed to copy CQ creation response to userspace\n");
+ goto err_remove_mmap;
+ }
+
+ vcq->entry = &entry->rdma_entry;
+ } else {
+ int sg_num = entries > vcq->vq->vq->num_free ? vcq->vq->vq->num_free : entries;
+ /* Kernel-only CQ: pre-post receive buffers to catch events */
+ for (int i = 0; i < sg_num; i++) {
+ sg_init_one(&sg, vcq->queue + i, sizeof(struct vrdma_cqe));
+ ret = virtqueue_add_inbuf(vcq->vq->vq, &sg, 1,
+ vcq->queue + i, GFP_KERNEL);
+ if (ret) {
+ dev_err(&vdev->vdev->dev,
+ "Failed to add inbuf to CQ vq: %d\n", ret);
+ /* Best-effort cleanup; continue anyway */
+ }
+ }
+ virtqueue_kick(vcq->vq->vq);
+ }
+
+ /* Final initialization */
+ spin_lock_init(&vcq->lock);
+
+ /* Cleanup temporaries */
+ kfree(rsp);
+ kfree(cmd);
+ return 0;
+
+err_remove_mmap:
+ if (udata && entry)
+ rdma_user_mmap_entry_remove(&entry->rdma_entry);
+err_free_entry:
+ if (entry)
+ kfree(entry);
+err_free_rsp:
+ kfree(rsp);
+err_free_cmd:
+ kfree(cmd);
+err_free_queue:
+ dma_free_coherent(vdev->vdev->dev.parent, vcq->queue_size,
+ vcq->queue, vcq->dma_addr);
+err_out:
+ atomic_dec(&vdev->num_cq);
+ return ret;
+}
+
+/**
+ * vrdma_destroy_cq - Destroy a Completion Queue (CQ) in virtio-rdma driver
+ * @cq: Pointer to the IB CQ to destroy
+ * @udata: User data context (may be NULL for kernel clients)
+ *
+ * This function destroys a CQ by:
+ * 1. Disabling callbacks on the associated virtqueue
+ * 2. Sending VIRTIO_RDMA_CMD_DESTROY_CQ command to backend
+ * 3. Draining any pending buffers from the virtqueue (for kernel CQs)
+ * 4. Removing mmap entries (if created for userspace)
+ * 5. Freeing DMA-coherent memory used for CQ ring
+ * 6. Decrementing device-wide CQ counter
+ *
+ * The CQ must not be in use when this function is called.
+ *
+ * Return:
+ * Always returns 0 (success). Future versions may return error if
+ * the device fails to acknowledge destruction.
+ */
+static int vrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+{
+ struct vrdma_cq *vcq = to_vcq(cq);
+ struct vrdma_dev *vdev = to_vdev(cq->device);
+ struct scatterlist in_sgs;
+ struct vrdma_cmd_destroy_cq *cmd;
+
+ /* Allocate command buffer */
+ cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ /* Prepare and send DESTROY_CQ command to backend */
+ cmd->cqn = vcq->cq_handle;
+ sg_init_one(&in_sgs, cmd, sizeof(*cmd));
+
+ /* Prevent further interrupts/callbacks during teardown */
+ virtqueue_disable_cb(vcq->vq->vq);
+
+ /* Send command synchronously; no response expected on success */
+ int rc = vrdma_exec_verbs_cmd(vdev, VIRTIO_RDMA_CMD_DESTROY_CQ,
+ &in_sgs, NULL);
+ if (rc) {
+ dev_warn(&vdev->vdev->dev,
+ "Failed to destroy CQ %u: backend error %d\n",
+ vcq->cq_handle, rc);
+ /* Proceed anyway: continue cleanup even if device failed */
+ }
+
+ /*
+ * For kernel-only CQs: drain all unused receive buffers.
+ * Userspace manages its own vring via mmap/poll, so skip.
+ */
+ if (!udata) {
+ struct vrdma_cqe *cqe;
+ while ((cqe = virtqueue_detach_unused_buf(vcq->vq->vq)) != NULL) {
+ /* No action needed - just release buffer back */
+ }
+ }
+
+ /* Remove mmap entry if one was created for userspace access */
+ if (vcq->entry) {
+ rdma_user_mmap_entry_remove(vcq->entry);
+ vcq->entry = NULL; /* Safety: avoid double-remove */
+ }
+
+ /* Unregister CQ from device's CQ table */
+ WRITE_ONCE(vdev->cqs[vcq->cq_handle], NULL);
+
+ /* Free CQ event ring (DMA memory) */
+ dma_free_coherent(vdev->vdev->dev.parent, vcq->queue_size,
+ vcq->queue, vcq->dma_addr);
+
+ /* Decrement global CQ count */
+ atomic_dec(&vdev->num_cq);
+
+ /* Re-enable callback (though vq will likely be reused or freed later) */
+ virtqueue_enable_cb(vcq->vq->vq);
+
+ /* Clean up command structure */
+ kfree(cmd);
+
+ return 0;
+}
+
+static int vrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+
+/**
+ * vrdma_alloc_pd - Allocate a Protection Domain (PD) via virtio-rdma backend
+ * @ibpd: Pointer to the IB PD structure
+ * @udata: User data for communication with userspace (may be NULL)
+ *
+ * This function:
+ * 1. Sends VIRTIO_RDMA_CMD_CREATE_PD to the backend
+ * 2. Receives a PD handle (pdn) from the device
+ * 3. Stores it in the vrdma_pd structure
+ * 4. Optionally returns an empty response to userspace (for ABI compatibility)
+ *
+ * Return:
+ * 0 on success, negative errno on failure.
+ */
+static int vrdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct vrdma_pd *pd = to_vpd(ibpd);
+ struct ib_device *ibdev = ibpd->device;
+ struct vrdma_dev *vdev = to_vdev(ibdev);
+ struct vrdma_rsp_create_pd *rsp;
+ struct scatterlist out_sgs;
+ int ret;
+
+ /* Allocate response buffer */
+ rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
+ if (!rsp)
+ return -ENOMEM;
+
+ sg_init_one(&out_sgs, rsp, sizeof(*rsp));
+
+ /* Send command to backend */
+ ret = vrdma_exec_verbs_cmd(vdev, VIRTIO_RDMA_CMD_CREATE_PD,
+ NULL, &out_sgs);
+ if (ret) {
+ dev_err(&vdev->vdev->dev,
+ "Failed to create PD: cmd error %d\n", ret);
+ goto err_free;
+ }
+
+ /* Store returned PD handle */
+ pd->pd_handle = rsp->pdn;
+
+ /* If this is a userspace PD, return success indicator */
+ if (udata) {
+ struct vrdma_alloc_pd_uresp uresp = {};
+
+ if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
+ dev_warn(&vdev->vdev->dev,
+ "Failed to copy PD uresp to userspace\n");
+ /* Undo: destroy the PD on backend */
+ vrdma_dealloc_pd(ibpd, udata);
+ ret = -EFAULT;
+ goto err_free;
+ }
+ }
+
+ dev_info(&vdev->vdev->dev, "%s: allocated PD %u\n",
+ __func__, pd->pd_handle);
+
+err_free:
+ kfree(rsp);
+ return ret;
+}
+
+/**
+ * vrdma_dealloc_pd - Deallocate a Protection Domain (PD)
+ * @ibpd: Pointer to the IB PD to destroy
+ * @udata: User data context (ignored here; used for symmetry)
+ *
+ * This function sends VIRTIO_RDMA_CMD_DESTROY_PD to the backend
+ * to release the PD resource. No response is expected.
+ *
+ * Note: There is no local state (e.g., DMA mappings) tied to PD in this driver.
+ * All cleanup is handled by the backend.
+ *
+ * Return:
+ * Always returns 0 (success).
+ */
+static int vrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct vrdma_pd *pd = to_vpd(ibpd);
+ struct ib_device *ibdev = ibpd->device;
+ struct vrdma_dev *vdev = to_vdev(ibdev);
+ struct vrdma_cmd_destroy_pd *cmd;
+ struct scatterlist in_sgs;
+ int ret;
+
+ cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ cmd->pdn = pd->pd_handle;
+ sg_init_one(&in_sgs, cmd, sizeof(*cmd));
+
+ ret = vrdma_exec_verbs_cmd(vdev, VIRTIO_RDMA_CMD_DESTROY_PD,
+ &in_sgs, NULL);
+ if (ret) {
+ dev_err(&vdev->vdev->dev,
+ "Failed to destroy PD %u: backend error %d\n",
+ pd->pd_handle, ret);
+ /* Proceed anyway - don't block cleanup */
+ }
+
+ dev_info(&vdev->vdev->dev, "%s: deallocated PD %u\n",
+ __func__, pd->pd_handle);
+
+ kfree(cmd);
+ return 0;
+}
+
+/**
+ * vrdma_init_mmap_entry - Initialize and insert a user mmap entry for QP buffer
+ * @vdev: Pointer to the vRDMA device
+ * @vq: Virtqueue associated with this memory region
+ * @entry_: Pointer to store allocated mmap entry (output)
+ * @buf_size: Size of the user data buffer (e.g., SQ/RQ ring space)
+ * @vctx: User context to which the mmap entry will be attached
+ * @size: Total size of the allocated mapping region (output)
+ * @used_off: Offset within the mapping where used ring starts (output)
+ * @vq_size: Aligned size of the virtqueue structure (output)
+ * @dma_addr: DMA address of the allocated coherent buffer (output)
+ *
+ * This function allocates a physically contiguous, DMA-coherent buffer for
+ * the Send/Receive Queue data (e.g., WQE payloads), maps the virtqueue's
+ * descriptor and used rings into userspace via an mmap entry, and inserts it
+ * into the user context. It supports fast doorbell mapping if enabled.
+ *
+ * The layout in userspace is:
+ * [0, buf_size_aligned) : Data buffer (SQ/RQ payload space)
+ * [buf_size_aligned, ...) : Virtqueue (desc + avail + used)
+ * [... + vq_size, ...] : Optional fast doorbell page
+ *
+ * Returns a pointer to the kernel virtual address of the data buffer,
+ * or NULL on failure.
+ */
+static void *vrdma_init_mmap_entry(struct vrdma_dev *vdev,
+ struct virtqueue *vq,
+ struct vrdma_user_mmap_entry **entry_,
+ int buf_size,
+ struct vrdma_ucontext *vctx,
+ __u64 *size,
+ __u64 *used_off,
+ __u32 *vq_size,
+ dma_addr_t *dma_addr)
+{
+ void *buf;
+ size_t total_size;
+ struct vrdma_user_mmap_entry *entry;
+ int rc;
+
+ /* Allocate aligned buffer for SQ/RQ payload area */
+ total_size = PAGE_ALIGN(buf_size);
+ buf = dma_alloc_coherent(vdev->vdev->dev.parent, total_size,
+ dma_addr, GFP_KERNEL);
+ if (!buf)
+ return NULL;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ dma_free_coherent(vdev->vdev->dev.parent, total_size,
+ buf, *dma_addr);
+ return NULL;
+ }
+
+ entry->mmap_type = VRDMA_MMAP_QP;
+ entry->vq = vq;
+ entry->user_buf = buf;
+ entry->ubuf_size = total_size; /* Already page-aligned */
+
+ /* Calculate offset from desc to used ring (for userspace polling) */
+ *used_off = virtqueue_get_used_addr(vq) - virtqueue_get_desc_addr(vq);
+
+ /* Align vring size to cache line boundary and round up to page size */
+ *vq_size = vring_size(virtqueue_get_vring_size(vq), SMP_CACHE_BYTES);
+ *vq_size = PAGE_ALIGN(*vq_size);
+ total_size += *vq_size;
+
+ /* Add extra page for fast doorbell if supported */
+ if (vdev->fast_doorbell)
+ total_size += PAGE_SIZE;
+
+ /* Insert into user mmap infrastructure */
+ rc = rdma_user_mmap_entry_insert(&vctx->ibucontext, &entry->rdma_entry,
+ total_size);
+ if (rc) {
+ dma_free_coherent(vdev->vdev->dev.parent, total_size,
+ buf, *dma_addr);
+ kfree(entry);
+ return NULL;
+ }
+
+ *size = total_size;
+ *entry_ = entry;
+ return buf;
+}
+
+/**
+ * vrdma_create_qp - Create a Virtio-RDMA Queue Pair (QP)
+ * @ibqp: Pointer to the IB QP structure (allocated by core)
+ * @attr: QP initialization attributes from userspace or kernel
+ * @udata: User data for mmap and doorbell mapping (NULL if kernel QP)
+ *
+ * This function creates a QP in the backend vRDMA device via a virtqueue
+ * command. It allocates resources including:
+ * - Send and Receive Queues (virtqueues)
+ * - Memory regions for WQE buffers (if user-space QP)
+ * - DMA-coherent rings and mmap entries
+ *
+ * On success, it returns 0 and fills @udata with offsets and sizes needed
+ * for userspace to mmap SQ/RQ rings and access CQ notification mechanisms.
+ *
+ * Context: Called in process context. May sleep.
+ * Return: 0 on success, negative errno on failure.
+ */
+static int vrdma_create_qp(struct ib_qp *ibqp,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct scatterlist in_sgs, out_sgs;
+ struct vrdma_dev *vdev = to_vdev(ibqp->device);
+ struct vrdma_cmd_create_qp *cmd;
+ struct vrdma_rsp_create_qp *rsp;
+ struct vrdma_qp *vqp = to_vqp(ibqp);
+ int rc, vqn;
+ int ret = 0;
+
+ /* SRQ is not supported yet */
+ if (attr->srq) {
+ dev_err(&vdev->vdev->dev, "SRQ is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* Enforce QP count limit */
+ if (!atomic_add_unless(&vdev->num_qp, 1, vdev->ib_dev.attrs.max_qp)) {
+ dev_dbg(&vdev->vdev->dev, "exceeded max_qp (%u)\n",
+ vdev->ib_dev.attrs.max_qp);
+ return -ENOMEM;
+ }
+
+ /* Validate QP attributes before sending to device */
+ if (vrdma_qp_check_init(vdev, attr)) {
+ dev_dbg(&vdev->vdev->dev, "invalid QP init attributes\n");
+ return -EINVAL;
+ }
+
+ cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+ if (!cmd) {
+ ret = -ENOMEM;
+ goto err_alloc_cmd;
+ }
+
+ rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
+ if (!rsp) {
+ ret = -ENOMEM;
+ goto err_alloc_rsp;
+ }
+
+ /* Prepare command for device */
+ cmd->pdn = to_vpd(ibqp->pd)->pd_handle;
+ cmd->qp_type = attr->qp_type;
+ cmd->sq_sig_type = attr->sq_sig_type;
+ cmd->max_send_wr = attr->cap.max_send_wr;
+ cmd->max_send_sge = attr->cap.max_send_sge;
+ cmd->send_cqn = to_vcq(attr->send_cq)->cq_handle;
+ cmd->max_recv_wr = attr->cap.max_recv_wr;
+ cmd->max_recv_sge = attr->cap.max_recv_sge;
+ cmd->recv_cqn = to_vcq(attr->recv_cq)->cq_handle;
+ cmd->max_inline_data = attr->cap.max_inline_data;
+
+ sg_init_one(&in_sgs, cmd, sizeof(*cmd));
+ sg_init_one(&out_sgs, rsp, sizeof(*rsp));
+
+ /* Execute CREATE_QP verb over control virtqueue */
+ rc = vrdma_exec_verbs_cmd(vdev, VIRTIO_RDMA_CMD_CREATE_QP,
+ &in_sgs, &out_sgs);
+ if (rc) {
+ dev_err(&vdev->vdev->dev, "CREATE_QP cmd failed: %d\n", rc);
+ ret = -EIO;
+ goto err_exec_cmd;
+ }
+
+ /* Initialize software QP state */
+ vqp->type = udata ? VIRTIO_RDMA_TYPE_USER : VIRTIO_RDMA_TYPE_KERNEL;
+ vqp->port = attr->port_num;
+ vqp->qp_handle = rsp->qpn;
+ ibqp->qp_num = rsp->qpn;
+
+ vqn = rsp->qpn;
+ vqp->sq = &vdev->qp_vqs[vqn * 2];
+ vqp->rq = &vdev->qp_vqs[vqn * 2 + 1];
+
+ /* If this is a user-space QP, set up mmap-able memory regions */
+ if (udata) {
+ struct vrdma_create_qp_uresp uresp = {};
+ struct vrdma_ucontext *uctx = rdma_udata_to_drv_context(
+ udata, struct vrdma_ucontext, ibucontext);
+ uint32_t per_wqe_size;
+
+ /* Allocate SQ buffer area */
+ per_wqe_size = sizeof(struct vrdma_cmd_post_send) +
+ sizeof(struct vrdma_sge) * attr->cap.max_send_sge;
+ vqp->usq_buf_size = PAGE_ALIGN(per_wqe_size * attr->cap.max_send_wr);
+
+ vqp->usq_buf = vrdma_init_mmap_entry(vdev, vqp->sq->vq,
+ &vqp->sq_entry,
+ vqp->usq_buf_size,
+ uctx,
+ &uresp.sq_mmap_size,
+ &uresp.svq_used_idx_off,
+ &uresp.svq_ring_size,
+ &vqp->usq_dma_addr);
+ if (!vqp->usq_buf) {
+ dev_err(&vdev->vdev->dev, "failed to init SQ mmap entry\n");
+ ret = -ENOMEM;
+ goto err_mmap_sq;
+ }
+
+ /* Allocate RQ buffer area */
+ per_wqe_size = sizeof(struct vrdma_cmd_post_send) +
+ sizeof(struct vrdma_sge) * attr->cap.max_recv_sge;
+ vqp->urq_buf_size = PAGE_ALIGN(per_wqe_size * attr->cap.max_recv_wr);
+
+ vqp->urq_buf = vrdma_init_mmap_entry(vdev, vqp->rq->vq,
+ &vqp->rq_entry,
+ vqp->urq_buf_size,
+ uctx,
+ &uresp.rq_mmap_size,
+ &uresp.rvq_used_idx_off,
+ &uresp.rvq_ring_size,
+ &vqp->urq_dma_addr);
+ if (!vqp->urq_buf) {
+ dev_err(&vdev->vdev->dev, "failed to init RQ mmap entry\n");
+ ret = -ENOMEM;
+ goto err_mmap_rq;
+ }
+
+ /* Fill response for userspace */
+ uresp.sq_mmap_offset = rdma_user_mmap_get_offset(&vqp->sq_entry->rdma_entry);
+ uresp.sq_db_addr = vqp->usq_dma_addr;
+ uresp.num_sq_wqes = attr->cap.max_send_wr;
+ uresp.num_svqe = virtqueue_get_vring_size(vqp->sq->vq);
+ uresp.sq_head_idx = vqp->sq->vq->index;
+
+ uresp.rq_mmap_offset = rdma_user_mmap_get_offset(&vqp->rq_entry->rdma_entry);
+ uresp.rq_db_addr = vqp->urq_dma_addr;
+ uresp.num_rq_wqes = attr->cap.max_recv_wr;
+ uresp.num_rvqe = virtqueue_get_vring_size(vqp->rq->vq);
+ uresp.rq_head_idx = vqp->rq->vq->index;
+
+ uresp.notifier_size = vdev->fast_doorbell ? PAGE_SIZE : 0;
+ uresp.qp_handle = vqp->qp_handle;
+
+ if (udata->outlen < sizeof(uresp)) {
+ dev_dbg(&vdev->vdev->dev, "user outlen too small: %zu < %zu\n",
+ udata->outlen, sizeof(uresp));
+ ret = -EINVAL;
+ goto err_copy_udata;
+ }
+
+ rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (rc) {
+ dev_err(&vdev->vdev->dev, "failed to copy udata to userspace\n");
+ ret = rc;
+ goto err_copy_udata;
+ }
+ }
+
+ /* Cleanup and return success */
+ kfree(cmd);
+ kfree(rsp);
+ return 0;
+
+err_copy_udata:
+ dma_free_coherent(vdev->vdev->dev.parent, vqp->urq_buf_size,
+ vqp->urq_buf, vqp->urq_dma_addr);
+ rdma_user_mmap_entry_remove(&vqp->rq_entry->rdma_entry);
+
+err_mmap_rq:
+ dma_free_coherent(vdev->vdev->dev.parent, vqp->usq_buf_size,
+ vqp->usq_buf, vqp->usq_dma_addr);
+ rdma_user_mmap_entry_remove(&vqp->sq_entry->rdma_entry);
+
+err_mmap_sq:
+ kfree(vqp);
+
+err_exec_cmd:
+ kfree(rsp);
+
+err_alloc_rsp:
+ kfree(cmd);
+
+err_alloc_cmd:
+ atomic_dec(&vdev->num_qp);
+ return ret;
+}
+
+/**
+ * vrdma_destroy_qp - Destroy a Virtio-RDMA Queue Pair (QP)
+ * @ibqp: Pointer to the IB QP to destroy
+ * @udata: User data context (may be NULL for kernel QPs)
+ *
+ * This function destroys a QP both in the host driver and on the backend
+ * vRDMA device. It performs the following steps:
+ * 1. Sends a VIRTIO_RDMA_CMD_DESTROY_QP command to the device.
+ * 2. Frees DMA-coherent memory used for user-space WQE buffers.
+ * 3. Removes mmap entries for SQ/RQ rings.
+ * 4. Decrements global QP count.
+ *
+ * Context: Called in process context. May sleep.
+ * Return:
+ * * 0 on success
+ * * Negative errno on failure (e.g., communication error with device)
+ */
+static int vrdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+ struct vrdma_dev *vdev = to_vdev(ibqp->device);
+ struct vrdma_qp *vqp = to_vqp(ibqp);
+ struct vrdma_cmd_destroy_qp *cmd;
+ struct scatterlist in;
+ int rc;
+
+ cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ cmd->qpn = vqp->qp_handle;
+ sg_init_one(&in, cmd, sizeof(*cmd));
+
+ /* Send DESTROY_QP command to the backend device */
+ rc = vrdma_exec_verbs_cmd(vdev, VIRTIO_RDMA_CMD_DESTROY_QP, &in, NULL);
+ if (rc) {
+ dev_err(&vdev->vdev->dev, "DESTROY_QP failed for qpn=%u: %d\n",
+ vqp->qp_handle, rc);
+ /*
+ * Even if the device command fails, we still proceed to free
+ * local resources because the QP is being destroyed from the
+ * software side regardless.
+ */
+ }
+
+ /* Clean up user-space mappings if this is a user QP */
+ if (udata) {
+ /* Free Send Queue buffer */
+ if (vqp->usq_buf) {
+ dma_free_coherent(vdev->vdev->dev.parent,
+ vqp->usq_buf_size,
+ vqp->usq_buf,
+ vqp->usq_dma_addr);
+ rdma_user_mmap_entry_remove(&vqp->sq_entry->rdma_entry);
+ }
+
+ /* Free Receive Queue buffer */
+ if (vqp->urq_buf) {
+ dma_free_coherent(vdev->vdev->dev.parent,
+ vqp->urq_buf_size,
+ vqp->urq_buf,
+ vqp->urq_dma_addr);
+ rdma_user_mmap_entry_remove(&vqp->rq_entry->rdma_entry);
+ }
+ }
+
+ /* Decrement global QP counter */
+ atomic_dec(&vdev->num_qp);
+
+ kfree(cmd);
+ return rc;
+}
+
+static const struct ib_device_ops vrdma_dev_ops = {
.owner = THIS_MODULE,
.uverbs_abi_ver = VIRTIO_RDMA_ABI_VERSION,
.driver_id = RDMA_DRIVER_VIRTIO,
+
+ .get_port_immutable = vrdma_port_immutable,
+ .query_device = vrdma_query_device,
+ .query_port = vrdma_query_port,
+ .get_netdev = vrdma_get_netdev,
+ .create_cq = vrdma_create_cq,
+ .destroy_cq = vrdma_destroy_cq,
+ .alloc_pd = vrdma_alloc_pd,
+ .dealloc_pd = vrdma_dealloc_pd,
+ .create_qp = vrdma_create_qp,
+ .destroy_qp = vrdma_destroy_qp,
};
/**
@@ -195,41 +1131,32 @@ int vrdma_register_ib_device(struct vrdma_dev *vrdev)
ibdev->node_type = RDMA_NODE_IB_CA;
strncpy(ibdev->node_desc, "VirtIO RDMA", sizeof(ibdev->node_desc));
- ibdev->phys_port_cnt = 1; /* Assume single port */
- ibdev->num_comp_vectors = 1; /* One completion vector */
+ ibdev->phys_port_cnt = 1; /* Assume single port */
+ ibdev->num_comp_vectors = 1; /* One completion vector */
- /* Set GUID: Use MAC-like identifier derived from device info (example) */
- memcpy(&ibdev->node_guid, vrdev->vdev->id.device, 6);
- *(u64 *)&ibdev->node_guid |= 0x020000 << 24; /* Make locally administered */
-
- /* --- Step 2: Set user verbs command mask --- */
-
- ibdev->uverbs_cmd_mask =
- BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) |
- BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) |
- BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) |
- BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) |
- BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) |
- BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) |
- BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) |
- BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) |
- BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) |
- BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) |
- BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) |
- BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) |
- BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) |
- BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) |
- BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
- BIT_ULL(IB_USER_VERBS_CMD_REG_MR) |
- BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) |
- BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) |
- BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) |
- BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) |
- BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH);
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) |
+ BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) |
+ BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) |
+ BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_REG_MR) |
+ BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) |
+ BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH);
/* --- Step 3: Attach device operation vectors --- */
- ib_set_device_ops(ibdev, &virtio_rdma_dev_ops);
+ ib_set_device_ops(ibdev, &vrdma_dev_ops);
/* --- Step 4: Bind to netdev (optional, for RoCE) --- */
if (vrdev->netdev) {
@@ -244,6 +1171,9 @@ int vrdma_register_ib_device(struct vrdma_dev *vrdev)
return rc;
}
+ /* Set GUID: Use MAC-like identifier derived from device info (example) */
+ memcpy(&ibdev->node_guid, ibdev->name, 6);
+
pr_info("Successfully registered vRDMA device as '%s'\n", dev_name(&ibdev->dev));
return 0;
}
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.h b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.h
index bdba5a9de..ba88599c8 100644
--- a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.h
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.h
@@ -13,6 +13,12 @@
#define VRDMA_COMM_TIMEOUT 1000000
+enum vrdma_type {
+ VIRTIO_RDMA_TYPE_USER,
+ VIRTIO_RDMA_TYPE_KERNEL
+};
+
+
enum {
VIRTIO_RDMA_ATOMIC_NONE,
VIRTIO_RDMA_ATOMIC_HCA,
@@ -71,12 +77,110 @@ struct vrdma_cq {
struct vrdma_vq *vq; /* Virtqueue where CQEs arrive */
struct rdma_user_mmap_entry *entry; /* For mmap support in userspace */
spinlock_t lock;
- struct virtio_rdma_cqe *queue; /* CQE ring buffer */
+ struct vrdma_cqe *queue; /* CQE ring buffer */
size_t queue_size;
dma_addr_t dma_addr;
u32 num_cqe;
};
+struct vrdma_pd {
+ struct ib_pd ibpd;
+ u32 pd_handle;
+};
+
+/**
+ * struct vrdma_port_attr - Virtual RDMA port attributes
+ * @state: Port physical state (e.g., IB_PORT_DOWN, IB_PORT_ACTIVE).
+ * @max_mtu: Maximum MTU supported by the port.
+ * @active_mtu: Currently active MTU.
+ * @phys_mtu: Physical layer MTU (typically same as active_mtu in virtual devices).
+ * @gid_tbl_len: Size of the GID table (number of supported GIDs).
+ * @port_cap_flags: Port capabilities (e.g., IB_PORT_CM_SUP, IB_PORT_IP_BASED_GIDS).
+ * @max_msg_sz: Maximum message size supported.
+ * @bad_pkey_cntr: P_Key violation counter (optional in virtual devices).
+ * @qkey_viol_cntr: QKey violation counter.
+ * @pkey_tbl_len: Number of entries in the P_Key table.
+ * @active_width: Current active width (e.g., IB_WIDTH_4X, IB_WIDTH_1X).
+ * @active_speed: Current active speed (e.g., IB_SPEED_10_GBPS, IB_SPEED_25_GBPS).
+ * @phys_state: Physical port state (vendor-specific, optional extension).
+ * @reserved: Reserved for future use or alignment padding; must be zeroed.
+ *
+ * This structure mirrors `struct ib_port_attr` from <rdma/ib_verbs.h> and is used
+ * to query port properties via `vrdma_query_port()` operation.
+ */
+struct vrdma_port_attr {
+ enum ib_port_state state;
+ enum ib_mtu max_mtu;
+ enum ib_mtu active_mtu;
+ u32 phys_mtu;
+ int gid_tbl_len;
+ u32 port_cap_flags;
+ u32 max_msg_sz;
+ u32 bad_pkey_cntr;
+ u32 qkey_viol_cntr;
+ u16 pkey_tbl_len;
+ u16 active_speed;
+ u8 active_width;
+ u8 phys_state;
+ u32 reserved[32]; /* For future extensions */
+} __packed;
+
+struct vrdma_ucontext {
+ struct ib_ucontext ibucontext;
+ struct vrdma_dev *dev;
+};
+
+/**
+ * struct vrdma_qp - Virtual RDMA Queue Pair (QP) private data
+ *
+ * This structure holds all driver-private state for a QP in the virtio-rdma driver.
+ * It is allocated during ib_create_qp() and freed on ib_destroy_qp().
+ */
+struct vrdma_qp {
+ /* Public IB layer object must be first */
+ struct ib_qp ibqp;
+
+ /* QP type (IB_QPT_RC, IB_QPT_UD, etc.) */
+ u8 type;
+
+ /* Port number this QP is bound to (usually 1 for single-port devices) */
+ u8 port;
+
+ /* Handle used by backend to identify this QP */
+ u32 qp_handle;
+
+ /* Send Queue (SQ) resources */
+ struct vrdma_vq *sq; /* Virtqueue for SQ ops */
+ void *usq_buf; /* Kernel-mapped send queue ring */
+ size_t usq_buf_size; /* Size of SQ ring buffer */
+ dma_addr_t usq_dma_addr; /* DMA address for coherent mapping */
+
+ /* Receive Queue (RQ) resources */
+ struct vrdma_vq *rq; /* Virtqueue for RQ ops */
+ void *urq_buf; /* Kernel-mapped receive queue ring */
+ size_t urq_buf_size; /* Size of RQ ring buffer */
+ dma_addr_t urq_dma_addr; /* DMA address for coherent mapping */
+
+ /* User-space mmap entries for userspace QP access */
+ struct vrdma_user_mmap_entry *sq_entry; /* Mmap entry for SQ buffer */
+ struct vrdma_user_mmap_entry *rq_entry; /* Mmap entry for RQ buffer */
+};
+
+static inline struct vrdma_cq *to_vcq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct vrdma_cq, ibcq);
+}
+
+static inline struct vrdma_pd *to_vpd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct vrdma_pd, ibpd);
+}
+
+static inline struct vrdma_qp *to_vqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct vrdma_qp, ibqp);
+}
+
int vrdma_register_ib_device(struct vrdma_dev *vrdev);
void vrdma_unregister_ib_device(struct vrdma_dev *vrdev);
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_main.c b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_main.c
index ea2f15491..9113fa3a3 100644
--- a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_main.c
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_main.c
@@ -29,39 +29,39 @@
*/
static int vrdma_probe(struct virtio_device *vdev)
{
- struct vrdma_dev *vrdev;
- int rc;
-
- /* Step 1: Allocate IB device structure using ib_core's allocator */
- vrdev = ib_alloc_device(vrdma_dev, ib_dev);
- if (!vrdev) {
- pr_err("Failed to allocate vRDMA device\n");
- return -ENOMEM;
- }
-
- /* Initialize basic fields */
- vrdev->vdev = vdev;
- vdev->priv = vrdev;
-
- spin_lock_init(&vrdev->ctrl_lock);
- spin_lock_init(&vrdev->pending_mmaps_lock);
- INIT_LIST_HEAD(&vrdev->pending_mmaps);
-
- /* Step 2: Check doorbell mechanism support */
- if (to_vp_device(vdev)->mdev.notify_offset_multiplier != PAGE_SIZE) {
- pr_warn("notify_offset_multiplier=%u != PAGE_SIZE, disabling fast doorbell\n",
- to_vp_device(vdev)->mdev.notify_offset_multiplier);
- vrdev->fast_doorbell = false;
- } else {
- vrdev->fast_doorbell = true;
- }
-
- /* Step 3: Initialize hardware interface (virtqueues) */
- rc = vrdma_init_device(vrdev);
- if (rc) {
- pr_err("Failed to initialize vRDMA device queues\n");
- goto err_dealloc_device;
- }
+ struct vrdma_dev *vrdev;
+ int rc;
+
+ /* Step 1: Allocate IB device structure using ib_core's allocator */
+ vrdev = ib_alloc_device(vrdma_dev, ib_dev);
+ if (!vrdev) {
+ pr_err("Failed to allocate vRDMA device\n");
+ return -ENOMEM;
+ }
+
+ /* Initialize basic fields */
+ vrdev->vdev = vdev;
+ vdev->priv = vrdev;
+
+ spin_lock_init(&vrdev->ctrl_lock);
+ spin_lock_init(&vrdev->pending_mmaps_lock);
+ INIT_LIST_HEAD(&vrdev->pending_mmaps);
+
+ /* Step 2: Check doorbell mechanism support */
+ if (to_vp_device(vdev)->mdev.notify_offset_multiplier != PAGE_SIZE) {
+ pr_warn("notify_offset_multiplier=%u != PAGE_SIZE, disabling fast doorbell\n",
+ to_vp_device(vdev)->mdev.notify_offset_multiplier);
+ vrdev->fast_doorbell = false;
+ } else {
+ vrdev->fast_doorbell = true;
+ }
+
+ /* Step 3: Initialize hardware interface (virtqueues) */
+ rc = vrdma_init_device(vrdev);
+ if (rc) {
+ pr_err("Failed to initialize vRDMA device queues\n");
+ goto err_dealloc_device;
+ }
rc = vrdma_init_netdev(vrdev);
if (rc) {
@@ -69,26 +69,26 @@ static int vrdma_probe(struct virtio_device *vdev)
goto err_cleanup_device;
}
- /* Step 4: Register with InfiniBand core layer */
- rc = vrdma_register_ib_device(vrdev);
- if (rc) {
- pr_err("Failed to register with IB subsystem\n");
- goto err_cleanup_netdev;
- }
+ /* Step 4: Register with InfiniBand core layer */
+ rc = vrdma_register_ib_device(vrdev);
+ if (rc) {
+ pr_err("Failed to register with IB subsystem\n");
+ goto err_cleanup_netdev;
+ }
- return 0;
+ return 0;
err_cleanup_netdev:
vrdma_finish_netdev(vrdev);
err_cleanup_device:
- vrdma_finish_device(vrdev); /* Safe cleanup of queues and reset */
+ vrdma_finish_device(vrdev); /* Safe cleanup of queues and reset */
err_dealloc_device:
- ib_dealloc_device(&vrdev->ib_dev); /* Frees vrdev itself */
+ ib_dealloc_device(&vrdev->ib_dev); /* Frees vrdev itself */
vdev->priv = NULL;
- return rc;
+ return rc;
}
static void vrdma_remove(struct virtio_device *vdev)
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_mmap.h b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_mmap.h
new file mode 100644
index 000000000..acad4626c
--- /dev/null
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_mmap.h
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Xiong Weimin <xiongweimin@...inos.cn> */
+/* Copyright 2020.kylinos.cn.All Rights Reserved.*/
+#ifndef __VRDMA_MMAP_H__
+#define __VRDMA_MMAP_H__
+
+#include <linux/types.h>
+#include <linux/kref.h>
+
+/* Mmap type definitions for virtio-rdma */
+#define VRDMA_MMAP_CQ 1 /* Mapping for Completion Queue (CQ) */
+#define VRDMA_MMAP_QP 2 /* Mapping for Queue Pair (QP) */
+
+/**
+ * struct vrdma_user_mmap_entry - Private extension of RDMA user mmap entry
+ *
+ * This structure extends the generic 'struct rdma_user_mmap_entry' to carry
+ * driver-specific data for virtio-rdma. It is used when registering mmap
+ * regions that allow userspace to directly access hardware rings or buffers
+ * via memory mapping (e.g., CQ or QP context rings).
+ *
+ * @rdma_entry: The base RDMA core mmap entry; must be the first member
+ * to ensure proper container_of() resolution and compatibility
+ * with RDMA subsystem APIs.
+ * @type: Specifies the type of mapped resource (VRDMA_MMAP_CQ or
+ * VIRTIO_RDMA_MMAP_QP). Used in fault handling to determine behavior.
+ * @vq: Pointer to the associated virtqueue. This allows the driver to link
+ * the mmap region with a specific virtual queue for event processing
+ * or doorbell handling.
+ * @ubuf: Virtual address of the user-space buffer (optional). Can be used
+ * to map kernel-managed ring buffers into user space, allowing direct
+ * access without system calls.
+ * @ubuf_size: Size of the user buffer in bytes. Used for bounds checking
+ * during mapping and fault operations.
+ */
+struct vrdma_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ u8 mmap_type; /* Type of mmap region (CQ/QP) */
+ struct virtqueue *vq; /* Associated virtqueue pointer */
+ void *user_buf; /* User buffer virtual address */
+ u64 ubuf_size; /* Size of the mapped user buffer */
+};
+
+static inline struct vrdma_user_mmap_entry *
+to_ventry(struct rdma_user_mmap_entry *rdma_entry)
+{
+ return container_of(rdma_entry, struct vrdma_user_mmap_entry,
+ rdma_entry);
+}
+
+/**
+ * virtio_rdma_mmap - Handle userspace mmap request for virtio-rdma resources
+ * @context: User context from IB layer
+ * @vma: Virtual memory area to be mapped
+ *
+ * This callback is invoked when userspace calls mmap() on a special offset
+ * returned by an ioctl (e.g., during CQ or QP creation). It maps device-specific
+ * memory regions (like completion queues or queue pair rings) into user space
+ * for zero-copy access.
+ *
+ * The VMA's pgoff field contains the mmap offset registered via
+ * rdma_user_mmap_entry_insert(). This function looks up the corresponding
+ * mmap entry and sets up the appropriate vm_ops for page fault handling.
+ *
+ * Return:
+ * - 0 on success
+ * - Negative error code (e.g., -EINVAL, -EAGAIN) on failure
+ */
+int vrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+/**
+ * virtio_rdma_mmap_free - Free mmap entry when userspace unmaps or closes
+ * @rdma_entry: The mmap entry being released
+ *
+ * This callback is registered with the RDMA core to free private mmap entries
+ * when the user process unmaps the region or exits. It is responsible for
+ * releasing any resources associated with the mapping (e.g., freeing metadata).
+ *
+ * The function should use 'to_ventry()' to retrieve the private structure,
+ * then kfree() it. Note: The actual mapped memory (e.g., ring buffer) may be
+ * freed separately depending on lifecycle management.
+ *
+ * @rdma_entry: Pointer to the base RDMA mmap entry (container within our struct)
+ */
+void vrdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
+
+#endif
\ No newline at end of file
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_netdev.c b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_netdev.c
index e83902e6d..19dd9af18 100644
--- a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_netdev.c
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_netdev.c
@@ -16,67 +16,67 @@
* @vrdev: The vRDMA device
*
* WARNING: This is a non-standard hack for development/emulation environments.
- * Do not use in production or upstream drivers.
+ * Do not use in production or upstream drivers.
*
* Returns 0 on success, or negative errno.
*/
int vrdma_init_netdev(struct vrdma_dev *vrdev)
{
- struct pci_dev *pdev_net;
- struct virtio_pci_device *vp_dev;
- struct virtio_pci_device *vnet_pdev;
- void *priv;
- struct net_device *netdev;
-
- if (!vrdev || !vrdev->vdev) {
- pr_err("%s: invalid vrdev or vdev\n", __func__);
- return -EINVAL;
- }
-
- vp_dev = to_vp_device(vrdev->vdev);
-
- /* Find the PCI device at function 0 of the same slot */
- pdev_net = pci_get_slot(vp_dev->pci_dev->bus,
- PCI_DEVFN(PCI_SLOT(vp_dev->pci_dev->devfn), 0));
- if (!pdev_net) {
- pr_err("Failed to find PCI device at fn=0 of slot %x\n",
- PCI_SLOT(vp_dev->pci_dev->devfn));
- return -ENODEV;
- }
-
- /* Optional: Validate it's a known virtio-net device */
- if (pdev_net->vendor != PCI_VENDOR_ID_REDHAT_QUMRANET ||
- pdev_net->device != 0x1041) {
- pr_warn("PCI device %04x:%04x is not expected virtio-net (1041) device\n",
- pdev_net->vendor, pdev_net->device);
- pci_dev_put(pdev_net);
- return -ENODEV;
- }
-
- /* Get the virtio_pci_device from drvdata */
- vnet_pdev = pci_get_drvdata(pdev_net);
- if (!vnet_pdev || !vnet_pdev->vdev.priv) {
- pr_err("No driver data or priv for virtio-net device\n");
- pci_dev_put(pdev_net);
- return -ENODEV;
- }
-
- priv = vnet_pdev->vdev.priv;
+ struct pci_dev *pdev_net;
+ struct virtio_pci_device *vp_dev;
+ struct virtio_pci_device *vnet_pdev;
+ void *priv;
+ struct net_device *netdev;
+
+ if (!vrdev || !vrdev->vdev) {
+ pr_err("%s: invalid vrdev or vdev\n", __func__);
+ return -EINVAL;
+ }
+
+ vp_dev = to_vp_device(vrdev->vdev);
+
+ /* Find the PCI device at function 0 of the same slot */
+ pdev_net = pci_get_slot(vp_dev->pci_dev->bus,
+ PCI_DEVFN(PCI_SLOT(vp_dev->pci_dev->devfn), 0));
+ if (!pdev_net) {
+ pr_err("Failed to find PCI device at fn=0 of slot %x\n",
+ PCI_SLOT(vp_dev->pci_dev->devfn));
+ return -ENODEV;
+ }
+
+ /* Optional: Validate it's a known virtio-net device */
+ if (pdev_net->vendor != PCI_VENDOR_ID_REDHAT_QUMRANET ||
+ pdev_net->device != 0x1041) {
+ pr_warn("PCI device %04x:%04x is not expected virtio-net (1041) device\n",
+ pdev_net->vendor, pdev_net->device);
+ pci_dev_put(pdev_net);
+ return -ENODEV;
+ }
+
+ /* Get the virtio_pci_device from drvdata */
+ vnet_pdev = pci_get_drvdata(pdev_net);
+ if (!vnet_pdev || !vnet_pdev->vdev.priv) {
+ pr_err("No driver data or priv for virtio-net device\n");
+ pci_dev_put(pdev_net);
+ return -ENODEV;
+ }
+
+ priv = vnet_pdev->vdev.priv;
vrdev->netdev = priv - ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
- netdev = vrdev->netdev;
+ netdev = vrdev->netdev;
- if (!netdev || !netdev->netdev_ops) {
- pr_err("Invalid net_device retrieved from virtio-net\n");
- pci_dev_put(pdev_net);
- return -ENODEV;
- }
+ if (!netdev || !netdev->netdev_ops) {
+ pr_err("Invalid net_device retrieved from virtio-net\n");
+ pci_dev_put(pdev_net);
+ return -ENODEV;
+ }
- /* Hold reference so netdev won't disappear */
- dev_hold(netdev);
+ /* Hold reference so netdev won't disappear */
+ dev_hold(netdev);
- pci_dev_put(pdev_net); /* Release reference from pci_get_slot */
+ pci_dev_put(pdev_net); /* Release reference from pci_get_slot */
- return 0;
+ return 0;
}
/**
@@ -88,18 +88,18 @@ int vrdma_init_netdev(struct vrdma_dev *vrdev)
*/
void vrdma_finish_netdev(struct vrdma_dev *vrdev)
{
- if (!vrdev) {
- pr_err("%s: invalid vrdev pointer\n", __func__);
- return;
- }
-
- if (vrdev->netdev) {
- pr_info("[%s]: Releasing reference to net_device '%s'\n",
- __func__, vrdev->netdev->name);
-
- dev_put(vrdev->netdev);
- vrdev->netdev = NULL;
- } else {
- pr_debug("%s: no netdev to release\n", __func__);
- }
+ if (!vrdev) {
+ pr_err("%s: invalid vrdev pointer\n", __func__);
+ return;
+ }
+
+ if (vrdev->netdev) {
+ pr_info("[%s]: Releasing reference to net_device '%s'\n",
+ __func__, vrdev->netdev->name);
+
+ dev_put(vrdev->netdev);
+ vrdev->netdev = NULL;
+ } else {
+ pr_debug("%s: no netdev to release\n", __func__);
+ }
}
\ No newline at end of file
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_queue.c b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_queue.c
index 78779c243..57c635aaf 100644
--- a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_queue.c
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_queue.c
@@ -18,4 +18,114 @@ void vrdma_cq_ack(struct virtqueue *vq)
if (vcq && vcq->ibcq.comp_handler)
vcq->ibcq.comp_handler(&vcq->ibcq, vcq->ibcq.cq_context);
+}
+
+/**
+ * vrdma_qp_check_cap - Validate QP capacity limits against device attributes
+ * @vdev: Pointer to the virtio-rdma device
+ * @cap: User-requested QP capabilities
+ * @has_srq: Whether this QP is associated with a Shared Receive Queue (SRQ)
+ *
+ * Checks that the requested QP capacities (WQEs, SGEs) do not exceed
+ * device limits. Inline data limit is currently ignored.
+ *
+ * Return:
+ * 0 if all constraints are satisfied, -EINVAL otherwise.
+ */
+static int vrdma_qp_check_cap(struct vrdma_dev *vdev,
+ const struct ib_qp_cap *cap, bool has_srq)
+{
+ if (cap->max_send_wr > vdev->attr.max_qp_wr) {
+ dev_warn(&vdev->vdev->dev,
+ "invalid max_send_wr = %u > %u\n",
+ cap->max_send_wr, vdev->attr.max_qp_wr);
+ return -EINVAL;
+ }
+
+ if (cap->max_send_sge > vdev->attr.max_send_sge) {
+ dev_warn(&vdev->vdev->dev,
+ "invalid max_send_sge = %u > %u\n",
+ cap->max_send_sge, vdev->attr.max_send_sge);
+ return -EINVAL;
+ }
+
+ /* Only check receive queue parameters if no SRQ is used */
+ if (!has_srq) {
+ if (cap->max_recv_wr > vdev->attr.max_qp_wr) {
+ dev_warn(&vdev->vdev->dev,
+ "invalid max_recv_wr = %u > %u\n",
+ cap->max_recv_wr, vdev->attr.max_qp_wr);
+ return -EINVAL;
+ }
+
+ if (cap->max_recv_sge > vdev->attr.max_recv_sge) {
+ dev_warn(&vdev->vdev->dev,
+ "invalid max_recv_sge = %u > %u\n",
+ cap->max_recv_sge, vdev->attr.max_recv_sge);
+ return -EINVAL;
+ }
+ }
+
+ /* TODO: Add check for inline data: cap->max_inline_data <= dev->attr.max_inline_data */
+
+ return 0;
+}
+
+/**
+ * vrdma_qp_check_init_attr - Validate QP initialization attributes
+ * @vdev: The virtual RDMA device
+ * @init: QP initialization attributes from user/kernel space
+ *
+ * Performs semantic validation of QP creation parameters including:
+ * - Supported QP types
+ * - Valid CQ bindings
+ * - Port number validity for special QP types
+ * - Capacity limits via vrdma_qp_check_cap()
+ *
+ * Return:
+ * 0 on success, negative errno code on failure.
+ */
+int vrdma_qp_check_init(struct vrdma_dev *vdev,
+ const struct ib_qp_init_attr *init)
+{
+ const struct ib_qp_cap *cap = &init->cap;
+ u8 port_num = init->port_num;
+ int ret;
+
+ /* Check supported QP types */
+ switch (init->qp_type) {
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ case IB_QPT_UD:
+ break; /* Supported */
+ default:
+ dev_dbg(&vdev->vdev->dev,
+ "QP type %d not supported\n", init->qp_type);
+ return -EOPNOTSUPP;
+ }
+
+ /* Send and receive CQs are required unless using SRQ-only recv path */
+ if (!init->send_cq || !init->recv_cq) {
+ dev_warn(&vdev->vdev->dev,
+ "missing send or recv completion queue\n");
+ return -EINVAL;
+ }
+
+ /* Validate QP capacity limits */
+ ret = vrdma_qp_check_cap(vdev, cap, !!init->srq);
+ if (ret)
+ return ret;
+
+ /* For SMI/GSI QPs, ensure port number is valid */
+ if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) {
+ if (!rdma_is_port_valid(&vdev->ib_dev, port_num)) {
+ dev_warn(&vdev->vdev->dev,
+ "invalid port number %u\n", port_num);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
}
\ No newline at end of file
diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_queue.h b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_queue.h
index 64b896208..a40c3762f 100644
--- a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_queue.h
+++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_queue.h
@@ -10,5 +10,6 @@
#include "vrdma_dev_api.h"
void vrdma_cq_ack(struct virtqueue *vq);
-
+int vrdma_qp_check_init(struct vrdma_dev *vdev,
+ const struct ib_qp_init_attr *init);
#endif
\ No newline at end of file
diff --git a/linux-6.16.8/include/rdma/ib_verbs.h b/linux-6.16.8/include/rdma/ib_verbs.h
index 6353da1c0..a129f1cc9 100644
--- a/linux-6.16.8/include/rdma/ib_verbs.h
+++ b/linux-6.16.8/include/rdma/ib_verbs.h
@@ -659,6 +659,7 @@ void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats);
#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
#define RDMA_CORE_CAP_PROT_RAW_PACKET 0x01000000
#define RDMA_CORE_CAP_PROT_USNIC 0x02000000
+#define RDMA_CORE_CAP_PROT_VIRTIO 0x04000000
#define RDMA_CORE_PORT_IB_GRH_REQUIRED (RDMA_CORE_CAP_IB_GRH_REQUIRED \
| RDMA_CORE_CAP_PROT_ROCE \
@@ -690,6 +691,14 @@ void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats);
#define RDMA_CORE_PORT_USNIC (RDMA_CORE_CAP_PROT_USNIC)
+/* in most time, RDMA_CORE_PORT_VIRTIO is same as RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP */
+#define RDMA_CORE_PORT_VIRTIO \
+ (RDMA_CORE_CAP_PROT_VIRTIO \
+ | RDMA_CORE_CAP_IB_MAD \
+ | RDMA_CORE_CAP_IB_CM \
+ | RDMA_CORE_CAP_AF_IB \
+ | RDMA_CORE_CAP_ETH_AH)
+
struct ib_port_attr {
u64 subnet_prefix;
enum ib_port_state state;
--
2.43.0
Powered by blists - more mailing lists