lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <0e1656dc-b67c-ec65-83a4-6709fb186061@linux.dev>
Date: Mon, 15 May 2023 15:52:42 -0700
From: Martin KaFai Lau <martin.lau@...ux.dev>
To: "D. Wythe" <alibuda@...ux.alibaba.com>
Cc: kuba@...nel.org, davem@...emloft.net, netdev@...r.kernel.org,
 linux-s390@...r.kernel.org, linux-rdma@...r.kernel.org, bpf@...r.kernel.org,
 kgraul@...ux.ibm.com, wenjia@...ux.ibm.com, jaka@...ux.ibm.com,
 ast@...nel.org, daniel@...earbox.net, andrii@...nel.org, pabeni@...hat.com,
 song@...nel.org, sdf@...gle.com, haoluo@...gle.com, yhs@...com,
 edumazet@...gle.com, john.fastabend@...il.com, kpsingh@...nel.org,
 jolsa@...nel.org, guwen@...ux.alibaba.com
Subject: Re: [PATCH bpf-next v1 2/5] net/smc: allow smc to negotiate protocols
 on policies

On 5/11/23 11:24 PM, D. Wythe wrote:
> From: "D. Wythe" <alibuda@...ux.alibaba.com>
> 
> As we all know, the SMC protocol is not suitable for all scenarios,
> especially for short-lived. However, for most applications, they cannot
> guarantee that there are no such scenarios at all. Therefore, apps
> may need some specific strategies to decide shall we need to use SMC
> or not.
> 
> Just like the congestion control implementation in TCP, this patch
> provides a generic negotiator implementation. If necessary,
> we can provide different protocol negotiation strategies for
> apps based on this implementation.
> 
> But most importantly, this patch provides the possibility of
> eBPF injection, allowing users to implement their own protocol
> negotiation policy in userspace.
> 
> Signed-off-by: D. Wythe <alibuda@...ux.alibaba.com>
> ---
>   include/net/smc.h        |  32 +++++++++++
>   net/Makefile             |   1 +
>   net/smc/Kconfig          |  11 ++++
>   net/smc/af_smc.c         | 134 ++++++++++++++++++++++++++++++++++++++++++++++-
>   net/smc/smc_negotiator.c | 119 +++++++++++++++++++++++++++++++++++++++++
>   net/smc/smc_negotiator.h | 116 ++++++++++++++++++++++++++++++++++++++++
>   6 files changed, 412 insertions(+), 1 deletion(-)
>   create mode 100644 net/smc/smc_negotiator.c
>   create mode 100644 net/smc/smc_negotiator.h
> 
> diff --git a/include/net/smc.h b/include/net/smc.h
> index 6d076f5..191061c 100644
> --- a/include/net/smc.h
> +++ b/include/net/smc.h
> @@ -296,6 +296,8 @@ struct smc_sock {				/* smc sock container */
>   	atomic_t                queued_smc_hs;  /* queued smc handshakes */
>   	struct inet_connection_sock_af_ops		af_ops;
>   	const struct inet_connection_sock_af_ops	*ori_af_ops;
> +	/* protocol negotiator ops */
> +	const struct smc_sock_negotiator_ops *negotiator_ops;
>   						/* original af ops */
>   	int			sockopt_defer_accept;
>   						/* sockopt TCP_DEFER_ACCEPT
> @@ -316,4 +318,34 @@ struct smc_sock {				/* smc sock container */
>   						 */
>   };
>   
> +#ifdef CONFIG_SMC_BPF
> +/* BPF struct ops for smc protocol negotiator */
> +struct smc_sock_negotiator_ops {
> +
> +	struct list_head	list;
> +
> +	/* ops name */
> +	char		name[16];
> +	/* key for name */
> +	u32			key;
> +
> +	/* init with sk */
> +	void (*init)(struct sock *sk);
> +
> +	/* release with sk */
> +	void (*release)(struct sock *sk);
> +
> +	/* advice for negotiate */
> +	int (*negotiate)(struct sock *sk);
> +
> +	/* info gathering timing */
> +	void (*collect_info)(struct sock *sk, int timing);
> +
> +	/* module owner */
> +	struct module *owner;
> +};
> +#else
> +struct smc_sock_negotiator_ops {};
> +#endif
> +
>   #endif	/* _SMC_H */
> diff --git a/net/Makefile b/net/Makefile
> index 4c4dc53..222916a 100644
> --- a/net/Makefile
> +++ b/net/Makefile
> @@ -52,6 +52,7 @@ obj-$(CONFIG_TIPC)		+= tipc/
>   obj-$(CONFIG_NETLABEL)		+= netlabel/
>   obj-$(CONFIG_IUCV)		+= iucv/
>   obj-$(CONFIG_SMC)		+= smc/
> +obj-$(CONFIG_SMC_BPF)		+= smc/smc_negotiator.o >   obj-$(CONFIG_RFKILL)		+= rfkill/
>   obj-$(CONFIG_NET_9P)		+= 9p/
>   obj-$(CONFIG_CAIF)		+= caif/
> diff --git a/net/smc/Kconfig b/net/smc/Kconfig
> index 1ab3c5a..bdcc9f1 100644
> --- a/net/smc/Kconfig
> +++ b/net/smc/Kconfig
> @@ -19,3 +19,14 @@ config SMC_DIAG
>   	  smcss.
>   
>   	  if unsure, say Y.
> +
> +config SMC_BPF
> +	bool "SMC: support eBPF" if SMC


so smc_negotiator will always be in the kernel image even af_smc is compiled as 
a module? If the SMC_BPF needs to support af_smc as a module, proper 
implementation needs to be added to bpf_struct_ops to support module first. It 
is work-in-progress.

> +	depends on BPF_SYSCALL
> +	default n
> +	help
> +	  Supports eBPF to allows user mode participation in SMC's protocol process
> +	  via ebpf programs. Alternatively, obtain information about the SMC socks
> +	  through the ebpf program.
> +
> +	  If unsure, say N.
> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> index 50c38b6..7406fd4 100644
> --- a/net/smc/af_smc.c
> +++ b/net/smc/af_smc.c
> @@ -52,6 +52,7 @@
>   #include "smc_close.h"
>   #include "smc_stats.h"
>   #include "smc_tracepoint.h"
> +#include "smc_negotiator.h"
>   #include "smc_sysctl.h"
>   
>   static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
> @@ -68,6 +69,119 @@
>   static void smc_tcp_listen_work(struct work_struct *);
>   static void smc_connect_work(struct work_struct *);
>   
> +#ifdef CONFIG_SMC_BPF
> +
> +/* Check if sock should use smc */
> +int smc_sock_should_select_smc(const struct smc_sock *smc)
> +{
> +	const struct smc_sock_negotiator_ops *ops;
> +	int ret;
> +
> +	rcu_read_lock();
> +	ops = READ_ONCE(smc->negotiator_ops);
> +
> +	/* No negotiator_ops supply or no negotiate func set,
> +	 * always pass it.
> +	 */
> +	if (!ops || !ops->negotiate) {

A smc_sock_negotiator_ops without ->negotiate? Is it useful at all to allow the 
register in the first place?

> +		rcu_read_unlock();
> +		return SK_PASS;
> +	}
> +
> +	ret = ops->negotiate((struct sock *)&smc->sk);
> +	rcu_read_unlock();
> +	return ret;
> +}
> +
> +void smc_sock_perform_collecting_info(const struct smc_sock *smc, int timing)
> +{
> +	const struct smc_sock_negotiator_ops *ops;
> +
> +	rcu_read_lock();
> +	ops = READ_ONCE(smc->negotiator_ops);
> +
> +	if (!ops || !ops->collect_info) {
> +		rcu_read_unlock();
> +		return;
> +	}
> +
> +	ops->collect_info((struct sock *)&smc->sk, timing);
> +	rcu_read_unlock();
> +}
> +
> +int smc_sock_assign_negotiator_ops(struct smc_sock *smc, const char *name)
> +{
> +	struct smc_sock_negotiator_ops *ops;
> +	int ret = -EINVAL;
> +
> +	/* already set */
> +	if (READ_ONCE(smc->negotiator_ops))
> +		smc_sock_cleanup_negotiator_ops(smc, /* might be still referenced */ false);
> +
> +	/* Just for clear negotiator_ops */
> +	if (!name || !strlen(name))
> +		return 0;
> +
> +	rcu_read_lock();
> +	ops = smc_negotiator_ops_get_by_name(name);
> +	if (likely(ops)) {
> +		if (unlikely(!bpf_try_module_get(ops, ops->owner))) {
> +			ret = -EACCES;
> +		} else {
> +			WRITE_ONCE(smc->negotiator_ops, ops);
> +			/* make sure ops can be seen */
> +			smp_wmb();

This rcu_read_lock(), WRITE_ONCE, and smp_wmb() combo looks very suspicious. 
smc->negotiator_ops is protected by rcu (+refcnt) or lock_sock()?

I am going to stop reviewing here.

> +			if (ops->init)
> +				ops->init(&smc->sk);
> +			ret = 0;
> +		}
> +	}
> +	rcu_read_unlock();
> +	return ret;
> +}
> +
> +void smc_sock_cleanup_negotiator_ops(struct smc_sock *smc, bool no_more)
> +{
> +	const struct smc_sock_negotiator_ops *ops;
> +
> +	ops = READ_ONCE(smc->negotiator_ops);
> +
> +	/* not all smc sock has negotiator_ops */
> +	if (!ops)
> +		return;
> +
> +	might_sleep();
> +
> +	/* Just ensure data integrity */
> +	WRITE_ONCE(smc->negotiator_ops, NULL);
> +	/* make sure NULL can be seen */
> +	smp_wmb();
> +	/* if the socks may have references to the negotiator ops to be removed.
> +	 * it means that we might need to wait for the readers of ops
> +	 * to complete. It's slow though.
> +	 */
> +	if (unlikely(!no_more))
> +		synchronize_rcu();
> +	if (ops->release)
> +		ops->release(&smc->sk);
> +	bpf_module_put(ops, ops->owner);
> +}
> +
> +void smc_sock_clone_negotiator_ops(struct sock *parent, struct sock *child)
> +{
> +	const struct smc_sock_negotiator_ops *ops;
> +
> +	rcu_read_lock();
> +	ops = READ_ONCE(smc_sk(parent)->negotiator_ops);
> +	if (ops && bpf_try_module_get(ops, ops->owner)) {
> +		smc_sk(child)->negotiator_ops = ops;
> +		if (ops->init)
> +			ops->init(child);
> +	}
> +	rcu_read_unlock();
> +}
> +#endif
> +
>   int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb)
>   {
>   	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
> @@ -166,6 +280,9 @@ static bool smc_hs_congested(const struct sock *sk)
>   	if (workqueue_congested(WORK_CPU_UNBOUND, smc_hs_wq))
>   		return true;
>   
> +	if (!smc_sock_should_select_smc(smc))
> +		return true;
> +
>   	return false;
>   }
>   
> @@ -320,6 +437,9 @@ static int smc_release(struct socket *sock)
>   	sock_hold(sk); /* sock_put below */
>   	smc = smc_sk(sk);
>   
> +	/* trigger info gathering if needed.*/
> +	smc_sock_perform_collecting_info(smc, SMC_SOCK_CLOSED_TIMING);
> +
>   	old_state = sk->sk_state;
>   
>   	/* cleanup for a dangling non-blocking connect */
> @@ -356,6 +476,9 @@ static int smc_release(struct socket *sock)
>   
>   static void smc_destruct(struct sock *sk)
>   {
> +	/* cleanup negotiator_ops if set */
> +	smc_sock_cleanup_negotiator_ops(smc_sk(sk), /* no longer used */ true);
> +
>   	if (sk->sk_state != SMC_CLOSED)
>   		return;
>   	if (!sock_flag(sk, SOCK_DEAD))
> @@ -1627,7 +1750,14 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
>   	}
>   
>   	smc_copy_sock_settings_to_clc(smc);
> -	tcp_sk(smc->clcsock->sk)->syn_smc = 1;
> +	/* accept out connection as SMC connection */
> +	if (smc_sock_should_select_smc(smc) == SK_PASS) {
> +		tcp_sk(smc->clcsock->sk)->syn_smc = 1;
> +	} else {
> +		tcp_sk(smc->clcsock->sk)->syn_smc = 0;
> +		smc_switch_to_fallback(smc, /* active fallback */ 0);
> +	}
> +
>   	if (smc->connect_nonblock) {
>   		rc = -EALREADY;
>   		goto out;
> @@ -1679,6 +1809,8 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
>   	}
>   	*new_smc = smc_sk(new_sk);
>   
> +	smc_sock_clone_negotiator_ops(lsk, new_sk);
> +
>   	mutex_lock(&lsmc->clcsock_release_lock);
>   	if (lsmc->clcsock)
>   		rc = kernel_accept(lsmc->clcsock, &new_clcsock, SOCK_NONBLOCK);



Powered by blists - more mailing lists