[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241201103659.420677-5-ymaman@nvidia.com>
Date: Sun, 1 Dec 2024 12:36:58 +0200
From: Yonatan Maman <ymaman@...dia.com>
To: <kherbst@...hat.com>, <lyude@...hat.com>, <dakr@...hat.com>,
<airlied@...il.com>, <simona@...ll.ch>, <jgg@...pe.ca>, <leon@...nel.org>,
<jglisse@...hat.com>, <akpm@...ux-foundation.org>, <Ymaman@...dia.com>,
<GalShalom@...dia.com>, <dri-devel@...ts.freedesktop.org>,
<nouveau@...ts.freedesktop.org>, <linux-kernel@...r.kernel.org>,
<linux-rdma@...r.kernel.org>, <linux-mm@...ck.org>,
<linux-tegra@...r.kernel.org>
Subject: [RFC 4/5] RDMA/mlx5: Add fallback for P2P DMA errors
From: Yonatan Maman <Ymaman@...dia.com>
Handle P2P DMA mapping errors when the transaction requires traversing
an inaccessible host bridge that is not in the allowlist:
- In `populate_mtt`, if a P2P mapping fails, the `HMM_PFN_ALLOW_P2P` flag
is cleared only for the PFNs that returned a mapping error.
- In `pagefault_real_mr`, if a P2P mapping error occurs, the mapping is
retried with the `HMM_PFN_ALLOW_P2P` flag only for the PFNs that didn't
fail, ensuring a fallback to standard DMA(host memory) for the rest,
if possible.
Signed-off-by: Yonatan Maman <Ymaman@...dia.com>
Signed-off-by: Gal Shalom <GalShalom@...dia.com>
---
drivers/infiniband/hw/mlx5/odp.c | 24 +++++++++++++++++++++---
1 file changed, 21 insertions(+), 3 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index fbb2a5670c32..f7a1291ec7d1 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -169,6 +169,7 @@ static int populate_mtt(__be64 *pas, size_t start, size_t nentries,
struct pci_p2pdma_map_state p2pdma_state = {};
struct ib_device *dev = odp->umem.ibdev;
size_t i;
+ int ret = 0;
if (flags & MLX5_IB_UPD_XLT_ZAP)
return 0;
@@ -184,8 +185,11 @@ static int populate_mtt(__be64 *pas, size_t start, size_t nentries,
dma_addr = hmm_dma_map_pfn(dev->dma_device, &odp->map,
start + i, &p2pdma_state);
- if (ib_dma_mapping_error(dev, dma_addr))
- return -EFAULT;
+ if (ib_dma_mapping_error(dev, dma_addr)) {
+ odp->map.pfn_list[start + i] &= ~(HMM_PFN_ALLOW_P2P);
+ ret = -EFAULT;
+ continue;
+ }
dma_addr |= MLX5_IB_MTT_READ;
if ((pfn & HMM_PFN_WRITE) && !downgrade)
@@ -194,7 +198,7 @@ static int populate_mtt(__be64 *pas, size_t start, size_t nentries,
pas[i] = cpu_to_be64(dma_addr);
odp->npages++;
}
- return 0;
+ return ret;
}
int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
@@ -696,6 +700,10 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
if (odp->umem.writable && !downgrade)
access_mask |= HMM_PFN_WRITE;
+ /*
+ * try fault with HMM_PFN_ALLOW_P2P flag
+ */
+ access_mask |= HMM_PFN_ALLOW_P2P;
np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask, fault);
if (np < 0)
return np;
@@ -705,6 +713,16 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
* ib_umem_odp_map_dma_and_lock already checks this.
*/
ret = mlx5r_umr_update_xlt(mr, start_idx, np, page_shift, xlt_flags);
+ if (ret == -EFAULT) {
+ /*
+ * Indicate P2P Mapping Error, retry with no HMM_PFN_ALLOW_P2P
+ */
+ access_mask &= ~HMM_PFN_ALLOW_P2P;
+ np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask, fault);
+ if (np < 0)
+ return np;
+ ret = mlx5r_umr_update_xlt(mr, start_idx, np, page_shift, xlt_flags);
+ }
mutex_unlock(&odp->umem_mutex);
if (ret < 0) {
--
2.34.1
Powered by blists - more mailing lists