lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YvtSt0DOAnqCdu6G@TonyMac-Alibaba>
Date:   Tue, 16 Aug 2022 16:17:59 +0800
From:   Tony Lu <tonylu@...ux.alibaba.com>
To:     "D. Wythe" <alibuda@...ux.alibaba.com>
Cc:     kgraul@...ux.ibm.com, wenjia@...ux.ibm.com, kuba@...nel.org,
        davem@...emloft.net, netdev@...r.kernel.org,
        linux-s390@...r.kernel.org, linux-rdma@...r.kernel.org
Subject: Re: [PATCH net-next 03/10] net/smc: allow confirm/delete rkey
 response deliver multiplex

On Thu, Aug 11, 2022 at 01:47:34AM +0800, D. Wythe wrote:
> From: "D. Wythe" <alibuda@...ux.alibaba.com>
> 
> We know that all flows except confirm_rkey and delete_rkey are exclusive,
> confirm/delete rkey flows can run concurrently (local and remote).
> 
> Although the protocol allows, all flows are actually mutually exclusive
> in implementation, dues to waiting for LLC messages is in serial.
> 
> This aggravates the time for establishing or destroying a SMC-R
> connections, connections have to be queued in smc_llc_wait.
> 
> We use rtokens or rkey to correlate a confirm/delete rkey message
> with its response.
> 
> Before sending a message, we put context with rtokens or rkey into
> wait queue. When a response message received, we wakeup the context
> which with the same rtokens or rkey against the response message.
> 
> Signed-off-by: D. Wythe <alibuda@...ux.alibaba.com>
> ---
>  net/smc/smc_llc.c | 174 +++++++++++++++++++++++++++++++++++++++++-------------
>  net/smc/smc_wr.c  |  10 ----
>  net/smc/smc_wr.h  |  10 ++++
>  3 files changed, 143 insertions(+), 51 deletions(-)
> 
> diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
> index 8134c15..b026df2 100644
> --- a/net/smc/smc_llc.c
> +++ b/net/smc/smc_llc.c
> @@ -200,6 +200,7 @@ struct smc_llc_msg_delete_rkey_v2 {	/* type 0x29 */
>  struct smc_llc_qentry {
>  	struct list_head list;
>  	struct smc_link *link;
> +	void *private;
>  	union smc_llc_msg msg;
>  };
>  
> @@ -479,19 +480,17 @@ int smc_llc_send_confirm_link(struct smc_link *link,
>  	return rc;
>  }
>  
> -/* send LLC confirm rkey request */
> -static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
> -				     struct smc_buf_desc *rmb_desc)
> +/* build LLC confirm rkey request */
> +static int smc_llc_build_confirm_rkey_request(struct smc_link *send_link,
> +					      struct smc_buf_desc *rmb_desc,
> +					      struct smc_wr_tx_pend_priv **priv)
>  {
>  	struct smc_llc_msg_confirm_rkey *rkeyllc;
> -	struct smc_wr_tx_pend_priv *pend;
>  	struct smc_wr_buf *wr_buf;
>  	struct smc_link *link;
>  	int i, rc, rtok_ix;
>  
> -	if (!smc_wr_tx_link_hold(send_link))
> -		return -ENOLINK;
> -	rc = smc_llc_add_pending_send(send_link, &wr_buf, &pend);
> +	rc = smc_llc_add_pending_send(send_link, &wr_buf, priv);
>  	if (rc)
>  		goto put_out;
>  	rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf;
> @@ -521,25 +520,20 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
>  		cpu_to_be64((uintptr_t)rmb_desc->cpu_addr) :
>  		cpu_to_be64((u64)sg_dma_address
>  			    (rmb_desc->sgt[send_link->link_idx].sgl));
> -	/* send llc message */
> -	rc = smc_wr_tx_send(send_link, pend);
>  put_out:
> -	smc_wr_tx_link_put(send_link);
>  	return rc;
>  }
>  
> -/* send LLC delete rkey request */
> -static int smc_llc_send_delete_rkey(struct smc_link *link,
> -				    struct smc_buf_desc *rmb_desc)
> +/* build LLC delete rkey request */
> +static int smc_llc_build_delete_rkey_request(struct smc_link *link,
> +					     struct smc_buf_desc *rmb_desc,
> +					     struct smc_wr_tx_pend_priv **priv)
>  {
>  	struct smc_llc_msg_delete_rkey *rkeyllc;
> -	struct smc_wr_tx_pend_priv *pend;
>  	struct smc_wr_buf *wr_buf;
>  	int rc;
>  
> -	if (!smc_wr_tx_link_hold(link))
> -		return -ENOLINK;
> -	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
> +	rc = smc_llc_add_pending_send(link, &wr_buf, priv);
>  	if (rc)
>  		goto put_out;
>  	rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf;
> @@ -548,10 +542,7 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
>  	smc_llc_init_msg_hdr(&rkeyllc->hd, link->lgr, sizeof(*rkeyllc));
>  	rkeyllc->num_rkeys = 1;
>  	rkeyllc->rkey[0] = htonl(rmb_desc->mr[link->link_idx]->rkey);
> -	/* send llc message */
> -	rc = smc_wr_tx_send(link, pend);
>  put_out:
> -	smc_wr_tx_link_put(link);
>  	return rc;
>  }
>  
> @@ -2023,7 +2014,8 @@ static void smc_llc_rx_response(struct smc_link *link,
>  	case SMC_LLC_DELETE_RKEY:
>  		if (flowtype != SMC_LLC_FLOW_RKEY || flow->qentry)
>  			break;	/* drop out-of-flow response */
> -		goto assign;
> +		__wake_up(&link->lgr->llc_msg_waiter, TASK_NORMAL, 1, qentry);
> +		goto free;
>  	case SMC_LLC_CONFIRM_RKEY_CONT:
>  		/* not used because max links is 3 */
>  		break;
> @@ -2032,6 +2024,7 @@ static void smc_llc_rx_response(struct smc_link *link,
>  					   qentry->msg.raw.hdr.common.type);
>  		break;
>  	}
> +free:
>  	kfree(qentry);
>  	return;
>  assign:
> @@ -2191,25 +2184,98 @@ void smc_llc_link_clear(struct smc_link *link, bool log)
>  	cancel_delayed_work_sync(&link->llc_testlink_wrk);
>  }
>  
> +static int smc_llc_rkey_response_wake_function(struct wait_queue_entry *wq_entry,
> +					       unsigned int mode, int sync, void *key)
> +{
> +	struct smc_llc_qentry *except, *incoming;
> +	u8 except_llc_type;
> +
> +	/* not a rkey response */
> +	if (!key)
> +		return 0;
> +
> +	except = wq_entry->private;
> +	incoming = key;
> +
> +	except_llc_type = except->msg.raw.hdr.common.llc_type;
> +
> +	/* except LLC MSG TYPE mismatch */
> +	if (except_llc_type != incoming->msg.raw.hdr.common.llc_type)
> +		return 0;
> +
> +	switch (except_llc_type) {
> +	case SMC_LLC_CONFIRM_RKEY:
> +		if (memcmp(except->msg.confirm_rkey.rtoken, incoming->msg.confirm_rkey.rtoken,
> +			   sizeof(struct smc_rmb_rtoken) *
> +			   except->msg.confirm_rkey.rtoken[0].num_rkeys))
> +			return 0;
> +		break;
> +	case SMC_LLC_DELETE_RKEY:
> +		if (memcmp(except->msg.delete_rkey.rkey, incoming->msg.delete_rkey.rkey,
> +			   sizeof(__be32) * except->msg.delete_rkey.num_rkeys))
> +			return 0;
> +		break;
> +	default:
> +		panic("invalid except llc msg %d", except_llc_type);

Replace panic with pr_warn?

> +		return 0;
> +	}
> +
> +	/* match, save hdr */
> +	memcpy(&except->msg.raw.hdr, &incoming->msg.raw.hdr, sizeof(except->msg.raw.hdr));
> +
> +	wq_entry->private = except->private;
> +	return woken_wake_function(wq_entry, mode, sync, NULL);
> +}
> +
>  /* register a new rtoken at the remote peer (for all links) */
>  int smc_llc_do_confirm_rkey(struct smc_link *send_link,
>  			    struct smc_buf_desc *rmb_desc)
>  {
> +	long timeout = SMC_LLC_WAIT_TIME;

Reverse Christmas trees.

>  	struct smc_link_group *lgr = send_link->lgr;
> -	struct smc_llc_qentry *qentry = NULL;
> -	int rc = 0;
> +	struct smc_llc_qentry qentry;
> +	struct smc_wr_tx_pend *pend;
> +	struct smc_wr_tx_pend_priv *priv;
> +	DEFINE_WAIT_FUNC(wait, smc_llc_rkey_response_wake_function);
> +	int rc = 0, flags;

Ditto.

>  
> -	rc = smc_llc_send_confirm_rkey(send_link, rmb_desc);
> +	if (!smc_wr_tx_link_hold(send_link))
> +		return -ENOLINK;
> +
> +	rc = smc_llc_build_confirm_rkey_request(send_link, rmb_desc, &priv);
>  	if (rc)
>  		goto out;
> -	/* receive CONFIRM RKEY response from server over RoCE fabric */
> -	qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME,
> -			      SMC_LLC_CONFIRM_RKEY);
> -	if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG))
> +
> +	pend = container_of(priv, struct smc_wr_tx_pend, priv);
> +	/* make a copy of send msg */
> +	memcpy(&qentry.msg.confirm_rkey, send_link->wr_tx_bufs[pend->idx].raw,
> +	       sizeof(qentry.msg.confirm_rkey));
> +
> +	qentry.private = wait.private;
> +	wait.private = &qentry;
> +
> +	add_wait_queue(&lgr->llc_msg_waiter, &wait);
> +
> +	/* send llc message */
> +	rc = smc_wr_tx_send(send_link, priv);
> +	smc_wr_tx_link_put(send_link);
> +	if (rc) {
> +		remove_wait_queue(&lgr->llc_msg_waiter, &wait);
> +		goto out;
> +	}
> +
> +	while (!signal_pending(current) && timeout) {
> +		timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout);
> +		if (qentry.msg.raw.hdr.flags & SMC_LLC_FLAG_RESP)
> +			break;
> +	}
> +
> +	remove_wait_queue(&lgr->llc_msg_waiter, &wait);
> +	flags = qentry.msg.raw.hdr.flags;
> +
> +	if (!(flags & SMC_LLC_FLAG_RESP) || flags & SMC_LLC_FLAG_RKEY_NEG)
>  		rc = -EFAULT;
>  out:
> -	if (qentry)
> -		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
>  	return rc;
>  }
>  
> @@ -2217,26 +2283,52 @@ int smc_llc_do_confirm_rkey(struct smc_link *send_link,
>  int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
>  			   struct smc_buf_desc *rmb_desc)
>  {
> -	struct smc_llc_qentry *qentry = NULL;
> +	long timeout = SMC_LLC_WAIT_TIME;
> +	struct smc_llc_qentry qentry;
> +	struct smc_wr_tx_pend *pend;
>  	struct smc_link *send_link;
> -	int rc = 0;
> +	struct smc_wr_tx_pend_priv *priv;

Reverse Christmas trees.

> +	DEFINE_WAIT_FUNC(wait, smc_llc_rkey_response_wake_function);
> +	int rc = 0, flags;
>  
>  	send_link = smc_llc_usable_link(lgr);
> -	if (!send_link)
> +	if (!send_link || !smc_wr_tx_link_hold(send_link))
>  		return -ENOLINK;
>  
> -	/* protected by llc_flow control */
> -	rc = smc_llc_send_delete_rkey(send_link, rmb_desc);
> +	rc = smc_llc_build_delete_rkey_request(send_link, rmb_desc, &priv);
>  	if (rc)
>  		goto out;
> -	/* receive DELETE RKEY response from server over RoCE fabric */
> -	qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME,
> -			      SMC_LLC_DELETE_RKEY);
> -	if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG))
> +
> +	pend = container_of(priv, struct smc_wr_tx_pend, priv);
> +	/* make a copy of send msg */
> +	memcpy(&qentry.msg.delete_link, send_link->wr_tx_bufs[pend->idx].raw,
> +	       sizeof(qentry.msg.delete_link));
> +
> +	qentry.private = wait.private;
> +	wait.private = &qentry;
> +
> +	add_wait_queue(&lgr->llc_msg_waiter, &wait);
> +
> +	/* send llc message */
> +	rc = smc_wr_tx_send(send_link, priv);
> +	smc_wr_tx_link_put(send_link);
> +	if (rc) {
> +		remove_wait_queue(&lgr->llc_msg_waiter, &wait);
> +		goto out;
> +	}
> +
> +	while (!signal_pending(current) && timeout) {
> +		timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout);
> +		if (qentry.msg.raw.hdr.flags & SMC_LLC_FLAG_RESP)
> +			break;
> +	}
> +
> +	remove_wait_queue(&lgr->llc_msg_waiter, &wait);
> +	flags = qentry.msg.raw.hdr.flags;
> +
> +	if (!(flags & SMC_LLC_FLAG_RESP) || flags & SMC_LLC_FLAG_RKEY_NEG)
>  		rc = -EFAULT;
>  out:
> -	if (qentry)
> -		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
>  	return rc;
>  }
>  
> diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
> index 26f8f24..52af94f 100644
> --- a/net/smc/smc_wr.c
> +++ b/net/smc/smc_wr.c
> @@ -37,16 +37,6 @@
>  static DEFINE_HASHTABLE(smc_wr_rx_hash, SMC_WR_RX_HASH_BITS);
>  static DEFINE_SPINLOCK(smc_wr_rx_hash_lock);
>  
> -struct smc_wr_tx_pend {	/* control data for a pending send request */
> -	u64			wr_id;		/* work request id sent */
> -	smc_wr_tx_handler	handler;
> -	enum ib_wc_status	wc_status;	/* CQE status */
> -	struct smc_link		*link;
> -	u32			idx;
> -	struct smc_wr_tx_pend_priv priv;
> -	u8			compl_requested;
> -};
> -
>  /******************************** send queue *********************************/
>  
>  /*------------------------------- completion --------------------------------*/
> diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
> index a54e90a..9946ed5 100644
> --- a/net/smc/smc_wr.h
> +++ b/net/smc/smc_wr.h
> @@ -46,6 +46,16 @@ struct smc_wr_rx_handler {
>  	u8			type;
>  };
>  
> +struct smc_wr_tx_pend {	/* control data for a pending send request */
> +	u64			wr_id;		/* work request id sent */
> +	smc_wr_tx_handler	handler;
> +	enum ib_wc_status	wc_status;	/* CQE status */
> +	struct smc_link		*link;
> +	u32			idx;
> +	struct smc_wr_tx_pend_priv priv;
> +	u8			compl_requested;
> +};
> +
>  /* Only used by RDMA write WRs.
>   * All other WRs (CDC/LLC) use smc_wr_tx_send handling WR_ID implicitly
>   */
> -- 
> 1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ