[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251113213712.776234-3-zhipingz@meta.com>
Date: Thu, 13 Nov 2025 13:37:12 -0800
From: Zhiping Zhang <zhipingz@...a.com>
To: Jason Gunthorpe <jgg@...pe.ca>, Leon Romanovsky <leon@...nel.org>,
Bjorn
Helgaas <bhelgaas@...gle.com>, <linux-rdma@...r.kernel.org>,
<linux-pci@...r.kernel.org>, <netdev@...r.kernel.org>,
Keith Busch
<kbusch@...nel.org>, Yochai Cohen <yochai@...dia.com>,
Yishai Hadas
<yishaih@...dia.com>
CC: Zhiping Zhang <zhipingz@...a.com>
Subject: [RFC 2/2] Set steering-tag directly for PCIe P2P memory access
RDMA: Set steering-tag value directly in DMAH struct for DMABUF MR
This patch enables construction of a dma handler (DMAH) with the P2P memory type
and a direct steering-tag value. It can be used to register a RDMA memory
region with DMABUF for the RDMA NIC to access the other device's memory via P2P.
Signed-off-by: Zhiping Zhang <zhipingz@...a.com>
---
.../infiniband/core/uverbs_std_types_dmah.c | 28 +++++++++++++++++++
drivers/infiniband/core/uverbs_std_types_mr.c | 3 ++
drivers/infiniband/hw/mlx5/dmah.c | 5 ++--
.../net/ethernet/mellanox/mlx5/core/lib/st.c | 12 +++++---
include/linux/mlx5/driver.h | 4 +--
include/rdma/ib_verbs.h | 2 ++
include/uapi/rdma/ib_user_ioctl_cmds.h | 1 +
7 files changed, 46 insertions(+), 9 deletions(-)
diff --git a/drivers/infiniband/core/uverbs_std_types_dmah.c b/drivers/infiniband/core/uverbs_std_types_dmah.c
index 453ce656c6f2..1ef400f96965 100644
--- a/drivers/infiniband/core/uverbs_std_types_dmah.c
+++ b/drivers/infiniband/core/uverbs_std_types_dmah.c
@@ -61,6 +61,27 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DMAH_ALLOC)(
dmah->valid_fields |= BIT(IB_DMAH_MEM_TYPE_EXISTS);
}
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_ALLOC_DMAH_DIRECT_ST_VAL)) {
+ ret = uverbs_copy_from(&dmah->direct_st_val, attrs,
+ UVERBS_ATTR_ALLOC_DMAH_DIRECT_ST_VAL);
+ if (ret)
+ goto err;
+
+ if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS)) {
+ ret = -EINVAL;
+ goto err;
+ }
+ if ((dmah->valid_fields & BIT(IB_DMAH_MEM_TYPE_EXISTS)) == 0) {
+ ret = -EINVAL;
+ goto err;
+ }
+ if (dmah->mem_type != TPH_MEM_TYPE_P2P) {
+ ret = -EINVAL;
+ goto err;
+ }
+ dmah->valid_fields |= BIT(IB_DMAH_DIRECT_ST_VAL_EXISTS);
+ }
+
if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_ALLOC_DMAH_PH)) {
ret = uverbs_copy_from(&dmah->ph, attrs,
UVERBS_ATTR_ALLOC_DMAH_PH);
@@ -107,6 +128,10 @@ static const struct uverbs_attr_spec uverbs_dmah_mem_type[] = {
.type = UVERBS_ATTR_TYPE_PTR_IN,
UVERBS_ATTR_NO_DATA(),
},
+ [TPH_MEM_TYPE_P2P] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
};
DECLARE_UVERBS_NAMED_METHOD(
@@ -123,6 +148,9 @@ DECLARE_UVERBS_NAMED_METHOD(
UA_OPTIONAL),
UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DMAH_PH,
UVERBS_ATTR_TYPE(u8),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DMAH_DIRECT_ST_VAL,
+ UVERBS_ATTR_TYPE(u16),
UA_OPTIONAL));
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index 570b9656801d..10e47934898e 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -346,6 +346,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_REG_MR)(
UVERBS_ATTR_REG_MR_DMA_HANDLE);
if (IS_ERR(dmah))
return PTR_ERR(dmah);
+ if (dmah->mem_type == TPH_MEM_TYPE_P2P && has_fd == false) {
+ return -EINVAL;
+ }
}
ret = uverbs_get_flags32(&access_flags, attrs,
diff --git a/drivers/infiniband/hw/mlx5/dmah.c b/drivers/infiniband/hw/mlx5/dmah.c
index 362a88992ffa..98c8d3313653 100644
--- a/drivers/infiniband/hw/mlx5/dmah.c
+++ b/drivers/infiniband/hw/mlx5/dmah.c
@@ -15,8 +15,7 @@ static int mlx5_ib_alloc_dmah(struct ib_dmah *ibdmah,
{
struct mlx5_core_dev *mdev = to_mdev(ibdmah->device)->mdev;
struct mlx5_ib_dmah *dmah = to_mdmah(ibdmah);
- u16 st_bits = BIT(IB_DMAH_CPU_ID_EXISTS) |
- BIT(IB_DMAH_MEM_TYPE_EXISTS);
+ u16 st_bits = BIT(IB_DMAH_MEM_TYPE_EXISTS);
int err;
/* PH is a must for TPH following PCIe spec 6.2-1.0 */
@@ -28,7 +27,7 @@ static int mlx5_ib_alloc_dmah(struct ib_dmah *ibdmah,
if ((ibdmah->valid_fields & st_bits) != st_bits)
return -EINVAL;
err = mlx5_st_alloc_index(mdev, ibdmah->mem_type,
- ibdmah->cpu_id, &dmah->st_index);
+ ibdmah->cpu_id, &dmah->st_index, ibdmah->direct_st_val);
if (err)
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c
index 47fe215f66bf..690ad8536128 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c
@@ -80,7 +80,7 @@ void mlx5_st_destroy(struct mlx5_core_dev *dev)
}
int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type,
- unsigned int cpu_uid, u16 *st_index)
+ unsigned int cpu_uid, u16 *st_index, u16 direct_st_val)
{
struct mlx5_st_idx_data *idx_data;
struct mlx5_st *st = dev->st;
@@ -92,9 +92,13 @@ int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type,
if (!st)
return -EOPNOTSUPP;
- ret = pcie_tph_get_cpu_st(dev->pdev, mem_type, cpu_uid, &tag);
- if (ret)
- return ret;
+ if (mem_type == TPH_MEM_TYPE_P2P)
+ tag = direct_st_val;
+ else {
+ ret = pcie_tph_get_cpu_st(dev->pdev, mem_type, cpu_uid, &tag);
+ if (ret)
+ return ret;
+ }
mutex_lock(&st->lock);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 1c8ba601e760..a58be1f2844b 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1170,12 +1170,12 @@ int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type
#ifdef CONFIG_PCIE_TPH
int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type,
- unsigned int cpu_uid, u16 *st_index);
+ unsigned int cpu_uid, u16 *st_index, u16 direct_st_val);
int mlx5_st_dealloc_index(struct mlx5_core_dev *dev, u16 st_index);
#else
static inline int mlx5_st_alloc_index(struct mlx5_core_dev *dev,
enum tph_mem_type mem_type,
- unsigned int cpu_uid, u16 *st_index)
+ unsigned int cpu_uid, u16 *st_index, u16 direct_st_val)
{
return -EOPNOTSUPP;
}
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 465b73d94f33..30a26b524f03 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1852,6 +1852,7 @@ enum {
IB_DMAH_CPU_ID_EXISTS,
IB_DMAH_MEM_TYPE_EXISTS,
IB_DMAH_PH_EXISTS,
+ IB_DMAH_DIRECT_ST_VAL_EXISTS,
};
struct ib_dmah {
@@ -1866,6 +1867,7 @@ struct ib_dmah {
atomic_t usecnt;
u8 ph;
u8 valid_fields; /* use IB_DMAH_XXX_EXISTS */
+ u16 direct_st_val;
};
struct ib_mr {
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index 17f963014eca..42b3892b6761 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -242,6 +242,7 @@ enum uverbs_attrs_alloc_dmah_cmd_attr_ids {
UVERBS_ATTR_ALLOC_DMAH_CPU_ID,
UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE,
UVERBS_ATTR_ALLOC_DMAH_PH,
+ UVERBS_ATTR_ALLOC_DMAH_DIRECT_ST_VAL,
};
enum uverbs_attrs_free_dmah_cmd_attr_ids {
--
2.47.3
Powered by blists - more mailing lists