lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-ID: <0e1656dc-b67c-ec65-83a4-6709fb186061@linux.dev> Date: Mon, 15 May 2023 15:52:42 -0700 From: Martin KaFai Lau <martin.lau@...ux.dev> To: "D. Wythe" <alibuda@...ux.alibaba.com> Cc: kuba@...nel.org, davem@...emloft.net, netdev@...r.kernel.org, linux-s390@...r.kernel.org, linux-rdma@...r.kernel.org, bpf@...r.kernel.org, kgraul@...ux.ibm.com, wenjia@...ux.ibm.com, jaka@...ux.ibm.com, ast@...nel.org, daniel@...earbox.net, andrii@...nel.org, pabeni@...hat.com, song@...nel.org, sdf@...gle.com, haoluo@...gle.com, yhs@...com, edumazet@...gle.com, john.fastabend@...il.com, kpsingh@...nel.org, jolsa@...nel.org, guwen@...ux.alibaba.com Subject: Re: [PATCH bpf-next v1 2/5] net/smc: allow smc to negotiate protocols on policies On 5/11/23 11:24 PM, D. Wythe wrote: > From: "D. Wythe" <alibuda@...ux.alibaba.com> > > As we all know, the SMC protocol is not suitable for all scenarios, > especially for short-lived. However, for most applications, they cannot > guarantee that there are no such scenarios at all. Therefore, apps > may need some specific strategies to decide shall we need to use SMC > or not. > > Just like the congestion control implementation in TCP, this patch > provides a generic negotiator implementation. If necessary, > we can provide different protocol negotiation strategies for > apps based on this implementation. > > But most importantly, this patch provides the possibility of > eBPF injection, allowing users to implement their own protocol > negotiation policy in userspace. > > Signed-off-by: D. Wythe <alibuda@...ux.alibaba.com> > --- > include/net/smc.h | 32 +++++++++++ > net/Makefile | 1 + > net/smc/Kconfig | 11 ++++ > net/smc/af_smc.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++- > net/smc/smc_negotiator.c | 119 +++++++++++++++++++++++++++++++++++++++++ > net/smc/smc_negotiator.h | 116 ++++++++++++++++++++++++++++++++++++++++ > 6 files changed, 412 insertions(+), 1 deletion(-) > create mode 100644 net/smc/smc_negotiator.c > create mode 100644 net/smc/smc_negotiator.h > > diff --git a/include/net/smc.h b/include/net/smc.h > index 6d076f5..191061c 100644 > --- a/include/net/smc.h > +++ b/include/net/smc.h > @@ -296,6 +296,8 @@ struct smc_sock { /* smc sock container */ > atomic_t queued_smc_hs; /* queued smc handshakes */ > struct inet_connection_sock_af_ops af_ops; > const struct inet_connection_sock_af_ops *ori_af_ops; > + /* protocol negotiator ops */ > + const struct smc_sock_negotiator_ops *negotiator_ops; > /* original af ops */ > int sockopt_defer_accept; > /* sockopt TCP_DEFER_ACCEPT > @@ -316,4 +318,34 @@ struct smc_sock { /* smc sock container */ > */ > }; > > +#ifdef CONFIG_SMC_BPF > +/* BPF struct ops for smc protocol negotiator */ > +struct smc_sock_negotiator_ops { > + > + struct list_head list; > + > + /* ops name */ > + char name[16]; > + /* key for name */ > + u32 key; > + > + /* init with sk */ > + void (*init)(struct sock *sk); > + > + /* release with sk */ > + void (*release)(struct sock *sk); > + > + /* advice for negotiate */ > + int (*negotiate)(struct sock *sk); > + > + /* info gathering timing */ > + void (*collect_info)(struct sock *sk, int timing); > + > + /* module owner */ > + struct module *owner; > +}; > +#else > +struct smc_sock_negotiator_ops {}; > +#endif > + > #endif /* _SMC_H */ > diff --git a/net/Makefile b/net/Makefile > index 4c4dc53..222916a 100644 > --- a/net/Makefile > +++ b/net/Makefile > @@ -52,6 +52,7 @@ obj-$(CONFIG_TIPC) += tipc/ > obj-$(CONFIG_NETLABEL) += netlabel/ > obj-$(CONFIG_IUCV) += iucv/ > obj-$(CONFIG_SMC) += smc/ > +obj-$(CONFIG_SMC_BPF) += smc/smc_negotiator.o > obj-$(CONFIG_RFKILL) += rfkill/ > obj-$(CONFIG_NET_9P) += 9p/ > obj-$(CONFIG_CAIF) += caif/ > diff --git a/net/smc/Kconfig b/net/smc/Kconfig > index 1ab3c5a..bdcc9f1 100644 > --- a/net/smc/Kconfig > +++ b/net/smc/Kconfig > @@ -19,3 +19,14 @@ config SMC_DIAG > smcss. > > if unsure, say Y. > + > +config SMC_BPF > + bool "SMC: support eBPF" if SMC so smc_negotiator will always be in the kernel image even af_smc is compiled as a module? If the SMC_BPF needs to support af_smc as a module, proper implementation needs to be added to bpf_struct_ops to support module first. It is work-in-progress. > + depends on BPF_SYSCALL > + default n > + help > + Supports eBPF to allows user mode participation in SMC's protocol process > + via ebpf programs. Alternatively, obtain information about the SMC socks > + through the ebpf program. > + > + If unsure, say N. > diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c > index 50c38b6..7406fd4 100644 > --- a/net/smc/af_smc.c > +++ b/net/smc/af_smc.c > @@ -52,6 +52,7 @@ > #include "smc_close.h" > #include "smc_stats.h" > #include "smc_tracepoint.h" > +#include "smc_negotiator.h" > #include "smc_sysctl.h" > > static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group > @@ -68,6 +69,119 @@ > static void smc_tcp_listen_work(struct work_struct *); > static void smc_connect_work(struct work_struct *); > > +#ifdef CONFIG_SMC_BPF > + > +/* Check if sock should use smc */ > +int smc_sock_should_select_smc(const struct smc_sock *smc) > +{ > + const struct smc_sock_negotiator_ops *ops; > + int ret; > + > + rcu_read_lock(); > + ops = READ_ONCE(smc->negotiator_ops); > + > + /* No negotiator_ops supply or no negotiate func set, > + * always pass it. > + */ > + if (!ops || !ops->negotiate) { A smc_sock_negotiator_ops without ->negotiate? Is it useful at all to allow the register in the first place? > + rcu_read_unlock(); > + return SK_PASS; > + } > + > + ret = ops->negotiate((struct sock *)&smc->sk); > + rcu_read_unlock(); > + return ret; > +} > + > +void smc_sock_perform_collecting_info(const struct smc_sock *smc, int timing) > +{ > + const struct smc_sock_negotiator_ops *ops; > + > + rcu_read_lock(); > + ops = READ_ONCE(smc->negotiator_ops); > + > + if (!ops || !ops->collect_info) { > + rcu_read_unlock(); > + return; > + } > + > + ops->collect_info((struct sock *)&smc->sk, timing); > + rcu_read_unlock(); > +} > + > +int smc_sock_assign_negotiator_ops(struct smc_sock *smc, const char *name) > +{ > + struct smc_sock_negotiator_ops *ops; > + int ret = -EINVAL; > + > + /* already set */ > + if (READ_ONCE(smc->negotiator_ops)) > + smc_sock_cleanup_negotiator_ops(smc, /* might be still referenced */ false); > + > + /* Just for clear negotiator_ops */ > + if (!name || !strlen(name)) > + return 0; > + > + rcu_read_lock(); > + ops = smc_negotiator_ops_get_by_name(name); > + if (likely(ops)) { > + if (unlikely(!bpf_try_module_get(ops, ops->owner))) { > + ret = -EACCES; > + } else { > + WRITE_ONCE(smc->negotiator_ops, ops); > + /* make sure ops can be seen */ > + smp_wmb(); This rcu_read_lock(), WRITE_ONCE, and smp_wmb() combo looks very suspicious. smc->negotiator_ops is protected by rcu (+refcnt) or lock_sock()? I am going to stop reviewing here. > + if (ops->init) > + ops->init(&smc->sk); > + ret = 0; > + } > + } > + rcu_read_unlock(); > + return ret; > +} > + > +void smc_sock_cleanup_negotiator_ops(struct smc_sock *smc, bool no_more) > +{ > + const struct smc_sock_negotiator_ops *ops; > + > + ops = READ_ONCE(smc->negotiator_ops); > + > + /* not all smc sock has negotiator_ops */ > + if (!ops) > + return; > + > + might_sleep(); > + > + /* Just ensure data integrity */ > + WRITE_ONCE(smc->negotiator_ops, NULL); > + /* make sure NULL can be seen */ > + smp_wmb(); > + /* if the socks may have references to the negotiator ops to be removed. > + * it means that we might need to wait for the readers of ops > + * to complete. It's slow though. > + */ > + if (unlikely(!no_more)) > + synchronize_rcu(); > + if (ops->release) > + ops->release(&smc->sk); > + bpf_module_put(ops, ops->owner); > +} > + > +void smc_sock_clone_negotiator_ops(struct sock *parent, struct sock *child) > +{ > + const struct smc_sock_negotiator_ops *ops; > + > + rcu_read_lock(); > + ops = READ_ONCE(smc_sk(parent)->negotiator_ops); > + if (ops && bpf_try_module_get(ops, ops->owner)) { > + smc_sk(child)->negotiator_ops = ops; > + if (ops->init) > + ops->init(child); > + } > + rcu_read_unlock(); > +} > +#endif > + > int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb) > { > struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); > @@ -166,6 +280,9 @@ static bool smc_hs_congested(const struct sock *sk) > if (workqueue_congested(WORK_CPU_UNBOUND, smc_hs_wq)) > return true; > > + if (!smc_sock_should_select_smc(smc)) > + return true; > + > return false; > } > > @@ -320,6 +437,9 @@ static int smc_release(struct socket *sock) > sock_hold(sk); /* sock_put below */ > smc = smc_sk(sk); > > + /* trigger info gathering if needed.*/ > + smc_sock_perform_collecting_info(smc, SMC_SOCK_CLOSED_TIMING); > + > old_state = sk->sk_state; > > /* cleanup for a dangling non-blocking connect */ > @@ -356,6 +476,9 @@ static int smc_release(struct socket *sock) > > static void smc_destruct(struct sock *sk) > { > + /* cleanup negotiator_ops if set */ > + smc_sock_cleanup_negotiator_ops(smc_sk(sk), /* no longer used */ true); > + > if (sk->sk_state != SMC_CLOSED) > return; > if (!sock_flag(sk, SOCK_DEAD)) > @@ -1627,7 +1750,14 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, > } > > smc_copy_sock_settings_to_clc(smc); > - tcp_sk(smc->clcsock->sk)->syn_smc = 1; > + /* accept out connection as SMC connection */ > + if (smc_sock_should_select_smc(smc) == SK_PASS) { > + tcp_sk(smc->clcsock->sk)->syn_smc = 1; > + } else { > + tcp_sk(smc->clcsock->sk)->syn_smc = 0; > + smc_switch_to_fallback(smc, /* active fallback */ 0); > + } > + > if (smc->connect_nonblock) { > rc = -EALREADY; > goto out; > @@ -1679,6 +1809,8 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) > } > *new_smc = smc_sk(new_sk); > > + smc_sock_clone_negotiator_ops(lsk, new_sk); > + > mutex_lock(&lsmc->clcsock_release_lock); > if (lsmc->clcsock) > rc = kernel_accept(lsmc->clcsock, &new_clcsock, SOCK_NONBLOCK);
Powered by blists - more mailing lists