lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c1c057ed-3519-fe9e-87bb-cbed534b4ab8@gmail.com>
Date:   Fri, 19 May 2023 12:10:06 -0500
From:   Bob Pearson <rpearsonhpe@...il.com>
To:     Daisuke Matsuda <matsuda-daisuke@...itsu.com>,
        linux-rdma@...r.kernel.org, leonro@...dia.com, jgg@...dia.com,
        zyjzyj2000@...il.com
Cc:     linux-kernel@...r.kernel.org, yangx.jy@...itsu.com,
        lizhijian@...itsu.com, y-goto@...itsu.com
Subject: Re: [PATCH for-next v5 6/7] RDMA/rxe: Add support for
 Send/Recv/Write/Read with ODP

On 5/18/23 03:21, Daisuke Matsuda wrote:
> rxe_mr_copy() is used widely to copy data to/from a user MR. requester uses
> it to load payloads of requesting packets; responder uses it to process
> Send, Write, and Read operaetions; completer uses it to copy data from
> response packets of Read and Atomic operations to a user MR.
> 
> Allow these operations to be used with ODP by adding a subordinate function
> rxe_odp_mr_copy(). It is comprised of the following steps:
>  1. Check the driver page table(umem_odp->dma_list) to see if pages being
>     accessed are present with appropriate permission.
>  2. If necessary, trigger page fault to map the pages.
>  3. Update the MR xarray using PFNs in umem_odp->pfn_list.
>  4. Execute data copy to/from the pages.
> 
> umem_mutex is used to ensure that dma_list (an array of addresses of an MR)
> is not changed while it is being checked and that mapped pages are not
> invalidated before data copy completes.
> 
> Signed-off-by: Daisuke Matsuda <matsuda-daisuke@...itsu.com>
> ---
>  drivers/infiniband/sw/rxe/rxe.c     |  10 +++
>  drivers/infiniband/sw/rxe/rxe_loc.h |   8 ++
>  drivers/infiniband/sw/rxe/rxe_mr.c  |   2 +-
>  drivers/infiniband/sw/rxe/rxe_odp.c | 109 ++++++++++++++++++++++++++++
>  4 files changed, 128 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
> index f2284d27229b..207a022156f0 100644
> --- a/drivers/infiniband/sw/rxe/rxe.c
> +++ b/drivers/infiniband/sw/rxe/rxe.c
> @@ -79,6 +79,16 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
>  
>  		/* IB_ODP_SUPPORT_IMPLICIT is not supported right now. */
>  		rxe->attr.odp_caps.general_caps |= IB_ODP_SUPPORT;
> +
> +		rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND;
> +		rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_RECV;
> +		rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
> +
> +		rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND;
> +		rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_RECV;
> +		rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_WRITE;
> +		rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ;
> +		rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
>  	}
>  }
>  
> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
> index 93247d123642..4b95c8c46bdc 100644
> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
> @@ -206,6 +206,8 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
>  #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
>  int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
>  			 u64 iova, int access_flags, struct rxe_mr *mr);
> +int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
> +		    enum rxe_mr_copy_dir dir);
>  #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
>  static inline int
>  rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
> @@ -213,6 +215,12 @@ rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>  {
>  	return -EOPNOTSUPP;
>  }
> +static inline int
> +rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
> +		int length, enum rxe_mr_copy_dir dir)
> +{
> +	return -EOPNOTSUPP;
> +}
>  
>  #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
>  
> diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
> index cd368cd096c8..0e3cda59d702 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mr.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mr.c
> @@ -319,7 +319,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
>  	}
>  
>  	if (mr->odp_enabled)
> -		return -EOPNOTSUPP;
> +		return rxe_odp_mr_copy(mr, iova, addr, length, dir);
>  	else
>  		return rxe_mr_copy_xarray(mr, iova, addr, length, dir);
>  }
> diff --git a/drivers/infiniband/sw/rxe/rxe_odp.c b/drivers/infiniband/sw/rxe/rxe_odp.c
> index e5497d09c399..cbe5d0c3fcc4 100644
> --- a/drivers/infiniband/sw/rxe/rxe_odp.c
> +++ b/drivers/infiniband/sw/rxe/rxe_odp.c
> @@ -174,3 +174,112 @@ int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
>  
>  	return err;
>  }
> +
> +static inline bool rxe_is_pagefault_neccesary(struct ib_umem_odp *umem_odp,
> +					      u64 iova, int length, u32 perm)
> +{
> +	int idx;
> +	u64 addr;
> +	bool need_fault = false;
> +
> +	addr = iova & (~(BIT(umem_odp->page_shift) - 1));
> +
> +	/* Skim through all pages that are to be accessed. */
> +	while (addr < iova + length) {
> +		idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
> +
> +		if (!(umem_odp->dma_list[idx] & perm)) {
> +			need_fault = true;
> +			break;
> +		}
> +
> +		addr += BIT(umem_odp->page_shift);
> +	}
> +	return need_fault;
> +}
> +
> +/* umem mutex must be locked before entering this function. */
> +static int rxe_odp_map_range(struct rxe_mr *mr, u64 iova, int length, u32 flags)
> +{
> +	struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
> +	const int max_tries = 3;
> +	int cnt = 0;
> +
> +	int err;
> +	u64 perm;
> +	bool need_fault;
> +
> +	if (unlikely(length < 1)) {
> +		mutex_unlock(&umem_odp->umem_mutex);
> +		return -EINVAL;
> +	}
> +
> +	perm = ODP_READ_ALLOWED_BIT;
> +	if (!(flags & RXE_PAGEFAULT_RDONLY))
> +		perm |= ODP_WRITE_ALLOWED_BIT;
> +
> +	/*
> +	 * A successful return from rxe_odp_do_pagefault() does not guarantee
> +	 * that all pages in the range became present. Recheck the DMA address
> +	 * array, allowing max 3 tries for pagefault.
> +	 */
> +	while ((need_fault = rxe_is_pagefault_neccesary(umem_odp,
> +							iova, length, perm))) {
> +		if (cnt >= max_tries)
> +			break;
> +
> +		mutex_unlock(&umem_odp->umem_mutex);
> +
> +		/* umem_mutex is locked on success. */
> +		err = rxe_odp_do_pagefault(mr, iova, length, flags);
> +		if (err < 0)
> +			return err;
> +
> +		cnt++;
> +	}
> +
> +	if (need_fault)
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
> +int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
> +		    enum rxe_mr_copy_dir dir)
> +{
> +	struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
> +	u32 flags = 0;
> +	int err;
> +
> +	if (unlikely(!mr->odp_enabled))
> +		return -EOPNOTSUPP;
> +
> +	switch (dir) {
> +	case RXE_TO_MR_OBJ:
> +		break;
> +
> +	case RXE_FROM_MR_OBJ:
> +		flags = RXE_PAGEFAULT_RDONLY;
> +		break;
> +
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	/* If pagefault is not required, umem mutex will be held until data
> +	 * copy to the MR completes. Otherwise, it is released and locked
> +	 * again in rxe_odp_map_range() to let invalidation handler do its
> +	 * work meanwhile.
> +	 */
> +	mutex_lock(&umem_odp->umem_mutex);
> +
> +	err = rxe_odp_map_range(mr, iova, length, flags);
> +	if (err)
> +		return err;
> +
> +	err =  rxe_mr_copy_xarray(mr, iova, addr, length, dir);
> +
> +	mutex_unlock(&umem_odp->umem_mutex);
> +
> +	return err;
> +}

Reviewed-by: Bob Pearson <rpearsonhpe@...il.com>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ