[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <86948347-529b-433a-991d-0b298776db63@linux.dev>
Date: Fri, 17 Jan 2025 15:50:48 -0800
From: Martin KaFai Lau <martin.lau@...ux.dev>
To: "D. Wythe" <alibuda@...ux.alibaba.com>
Cc: kgraul@...ux.ibm.com, wenjia@...ux.ibm.com, jaka@...ux.ibm.com,
ast@...nel.org, daniel@...earbox.net, andrii@...nel.org, pabeni@...hat.com,
song@...nel.org, sdf@...gle.com, haoluo@...gle.com, yhs@...com,
edumazet@...gle.com, john.fastabend@...il.com, kpsingh@...nel.org,
jolsa@...nel.org, guwen@...ux.alibaba.com, kuba@...nel.org,
davem@...emloft.net, netdev@...r.kernel.org, linux-s390@...r.kernel.org,
linux-rdma@...r.kernel.org, bpf@...r.kernel.org
Subject: Re: [PATCH bpf-next v6 2/5] net/smc: Introduce generic hook smc_ops
On 1/15/25 11:44 PM, D. Wythe wrote:
> diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
> index 2fab6456f765..2004241c3045 100644
> --- a/net/smc/smc_sysctl.c
> +++ b/net/smc/smc_sysctl.c
> @@ -18,6 +18,7 @@
> #include "smc_core.h"
> #include "smc_llc.h"
> #include "smc_sysctl.h"
> +#include "smc_ops.h"
>
> static int min_sndbuf = SMC_BUF_MIN_SIZE;
> static int min_rcvbuf = SMC_BUF_MIN_SIZE;
> @@ -30,6 +31,69 @@ static int links_per_lgr_max = SMC_LINKS_ADD_LNK_MAX;
> static int conns_per_lgr_min = SMC_CONN_PER_LGR_MIN;
> static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX;
>
> +#if IS_ENABLED(CONFIG_SMC_OPS)
> +static int smc_net_replace_smc_ops(struct net *net, const char *name)
> +{
> + struct smc_ops *ops = NULL;
> +
> + rcu_read_lock();
> + /* null or empty name ask to clear current ops */
> + if (name && name[0]) {
> + ops = smc_ops_find_by_name(name);
> + if (!ops) {
> + rcu_read_unlock();
> + return -EINVAL;
> + }
> + /* no change, just return */
> + if (ops == rcu_dereference(net->smc.ops)) {
> + rcu_read_unlock();
> + return 0;
> + }
> + }
> + if (!ops || bpf_try_module_get(ops, ops->owner)) {
> + /* xhcg */
typo. I noticed it only because...
> + ops = rcu_replace_pointer(net->smc.ops, ops, true);
... rcu_replace_pointer() does not align with the above xchg comment. From
looking into rcu_replace_pointer, it is not a xchg. It is also not obvious to me
why it is safe to assume "true" here...
> + /* release old ops */
> + if (ops)
> + bpf_module_put(ops, ops->owner);
... together with a put here on the return value of the rcu_replace_pointer.
> + } else if (ops) {
nit. This looks redundant when looking at the "if (!ops || ..." test above
Also a nit, I would move the bpf_try_module_get() immediately after the above
"if (ops == rcu_dereference(net->smc.ops))" test. This should simplify the later
cases.
> + rcu_read_unlock();
> + return -EBUSY;
> + }
> + rcu_read_unlock();
> + return 0;
> +}
> +
> +static int proc_smc_ops(const struct ctl_table *ctl, int write,
> + void *buffer, size_t *lenp, loff_t *ppos)
> +{
> + struct net *net = container_of(ctl->data, struct net, smc.ops);
> + char val[SMC_OPS_NAME_MAX];
> + const struct ctl_table tbl = {
> + .data = val,
> + .maxlen = SMC_OPS_NAME_MAX,
> + };
> + struct smc_ops *ops;
> + int ret;
> +
> + rcu_read_lock();
> + ops = rcu_dereference(net->smc.ops);
> + if (ops)
> + memcpy(val, ops->name, sizeof(ops->name));
> + else
> + val[0] = '\0';
> + rcu_read_unlock();
> +
> + ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
> + if (ret)
> + return ret;
> +
> + if (write)
> + ret = smc_net_replace_smc_ops(net, val);
> + return ret;
> +}
> +#endif /* CONFIG_SMC_OPS */
> +
> static struct ctl_table smc_table[] = {
> {
> .procname = "autocorking_size",
> @@ -99,6 +163,15 @@ static struct ctl_table smc_table[] = {
> .extra1 = SYSCTL_ZERO,
> .extra2 = SYSCTL_ONE,
> },
> +#if IS_ENABLED(CONFIG_SMC_OPS)
> + {
> + .procname = "ops",
> + .data = &init_net.smc.ops,
> + .mode = 0644,
> + .maxlen = SMC_OPS_NAME_MAX,
> + .proc_handler = proc_smc_ops,
> + },
> +#endif /* CONFIG_SMC_OPS */
> };
>
> int __net_init smc_sysctl_net_init(struct net *net)
> @@ -109,6 +182,20 @@ int __net_init smc_sysctl_net_init(struct net *net)
> table = smc_table;
> if (!net_eq(net, &init_net)) {
> int i;
> +#if IS_ENABLED(CONFIG_SMC_OPS)
> + struct smc_ops *ops;
> +
> + rcu_read_lock();
> + ops = rcu_dereference(init_net.smc.ops);
> + if (ops && ops->flags & SMC_OPS_FLAG_INHERITABLE) {
> + if (!bpf_try_module_get(ops, ops->owner)) {
> + rcu_read_unlock();
> + return -EBUSY;
Not sure if it should count as error when the ops is in the process of
un-register-ing. The next smc_sysctl_net_init will have NULL ops and succeed.
Something for you to consider.
Also, it needs an ack from the SMC maintainer for the SMC specific parts like
the sysctl here.
Powered by blists - more mailing lists