[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <151b85fe-72a2-4eb4-9aef-4e3b13b1c8ff@redhat.com>
Date: Tue, 15 Jul 2025 12:05:52 +0200
From: Paolo Abeni <pabeni@...hat.com>
To: Matt Johnston <matt@...econstruct.com.au>,
Jeremy Kerr <jk@...econstruct.com.au>, "David S. Miller"
<davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Simon Horman <horms@...nel.org>
Cc: netdev@...r.kernel.org
Subject: Re: [PATCH net-next v4 5/8] net: mctp: Use hashtable for binds
On 7/10/25 10:55 AM, Matt Johnston wrote:
> Ensure that a specific EID (remote or local) bind will match in
> preference to a MCTP_ADDR_ANY bind.
>
> This adds infrastructure for binding a socket to receive messages from a
> specific remote peer address, a future commit will expose an API for
> this.
>
> Signed-off-by: Matt Johnston <matt@...econstruct.com.au>
>
> ---
> v2:
> - Use DECLARE_HASHTABLE
> - Fix long lines
> ---
> include/net/netns/mctp.h | 20 +++++++++---
> net/mctp/af_mctp.c | 11 ++++---
> net/mctp/route.c | 81 ++++++++++++++++++++++++++++++++++++++----------
> 3 files changed, 87 insertions(+), 25 deletions(-)
>
> diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h
> index 1db8f9aaddb4b96f4803df9f30a762f5f88d7f7f..89555f90b97b297e50a571b26c5232b824909da7 100644
> --- a/include/net/netns/mctp.h
> +++ b/include/net/netns/mctp.h
> @@ -6,19 +6,25 @@
> #ifndef __NETNS_MCTP_H__
> #define __NETNS_MCTP_H__
>
> +#include <linux/hash.h>
> +#include <linux/hashtable.h>
> #include <linux/mutex.h>
> #include <linux/types.h>
>
> +#define MCTP_BINDS_BITS 7
> +
> struct netns_mctp {
> /* Only updated under RTNL, entries freed via RCU */
> struct list_head routes;
>
> - /* Bound sockets: list of sockets bound by type.
> - * This list is updated from non-atomic contexts (under bind_lock),
> - * and read (under rcu) in packet rx
> + /* Bound sockets: hash table of sockets, keyed by
> + * (type, src_eid, dest_eid).
> + * Specific src_eid/dest_eid entries also have an entry for
> + * MCTP_ADDR_ANY. This list is updated from non-atomic contexts
> + * (under bind_lock), and read (under rcu) in packet rx.
> */
> struct mutex bind_lock;
> - struct hlist_head binds;
> + DECLARE_HASHTABLE(binds, MCTP_BINDS_BITS);
Note that I comment on patch 2/8 before actually looking at this patch.
As a possible follow-up I suggest a dynamically allocating the hash
table at first bind time.
>
> /* tag allocations. This list is read and updated from atomic contexts,
> * but elements are free()ed after a RCU grace-period
> @@ -34,4 +40,10 @@ struct netns_mctp {
> struct list_head neighbours;
> };
>
> +static inline u32 mctp_bind_hash(u8 type, u8 local_addr, u8 peer_addr)
> +{
> + return hash_32(type | (u32)local_addr << 8 | (u32)peer_addr << 16,
> + MCTP_BINDS_BITS);
> +}
> +
> #endif /* __NETNS_MCTP_H__ */
> diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
> index 20edaf840a607700c04b740708763fbd02a2df47..16341de5cf2893bbc04a8c05a038c30be6570296 100644
> --- a/net/mctp/af_mctp.c
> +++ b/net/mctp/af_mctp.c
> @@ -626,17 +626,17 @@ static int mctp_sk_hash(struct sock *sk)
> struct net *net = sock_net(sk);
> struct sock *existing;
> struct mctp_sock *msk;
> + u32 hash;
> int rc;
>
> msk = container_of(sk, struct mctp_sock, sk);
>
> - /* Bind lookup runs under RCU, remain live during that. */
> - sock_set_flag(sk, SOCK_RCU_FREE);
> + hash = mctp_bind_hash(msk->bind_type, msk->bind_addr, MCTP_ADDR_ANY);
>
> mutex_lock(&net->mctp.bind_lock);
>
> /* Prevent duplicate binds. */
> - sk_for_each(existing, &net->mctp.binds) {
> + sk_for_each(existing, &net->mctp.binds[hash]) {
> struct mctp_sock *mex =
> container_of(existing, struct mctp_sock, sk);
>
> @@ -648,7 +648,10 @@ static int mctp_sk_hash(struct sock *sk)
> }
> }
>
> - sk_add_node_rcu(sk, &net->mctp.binds);
> + /* Bind lookup runs under RCU, remain live during that. */
> + sock_set_flag(sk, SOCK_RCU_FREE);
> +
> + sk_add_node_rcu(sk, &net->mctp.binds[hash]);
> rc = 0;
>
> out:
> diff --git a/net/mctp/route.c b/net/mctp/route.c
> index a20d6b11d4186b55cab9d76e367169ea712553c7..69cfb0e6c545c2b44e5defdfac4e602c4f0265b1 100644
> --- a/net/mctp/route.c
> +++ b/net/mctp/route.c
> @@ -40,14 +40,45 @@ static int mctp_dst_discard(struct mctp_dst *dst, struct sk_buff *skb)
> return 0;
> }
>
> -static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
> +static struct mctp_sock *mctp_lookup_bind_details(struct net *net,
> + struct sk_buff *skb,
> + u8 type, u8 dest,
> + u8 src, bool allow_net_any)
> {
> struct mctp_skb_cb *cb = mctp_cb(skb);
> - struct mctp_hdr *mh;
> struct sock *sk;
> - u8 type;
> + u8 hash;
>
> - WARN_ON(!rcu_read_lock_held());
> + WARN_ON_ONCE(!rcu_read_lock_held());
> +
> + hash = mctp_bind_hash(type, dest, src);
> +
> + sk_for_each_rcu(sk, &net->mctp.binds[hash]) {
> + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
> +
> + if (!allow_net_any && msk->bind_net == MCTP_NET_ANY)
> + continue;
> +
> + if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
> + continue;
> +
> + if (msk->bind_type != type)
> + continue;
> +
> + if (!mctp_address_matches(msk->bind_addr, dest))
> + continue;
> +
> + return msk;
> + }
> +
> + return NULL;
> +}
> +
> +static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
> +{
> + struct mctp_sock *msk;
> + struct mctp_hdr *mh;
> + u8 type;
>
> /* TODO: look up in skb->cb? */
> mh = mctp_hdr(skb);
> @@ -57,20 +88,36 @@ static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
>
> type = (*(u8 *)skb->data) & 0x7f;
>
> - sk_for_each_rcu(sk, &net->mctp.binds) {
> - struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
> -
> - if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
> - continue;
> -
> - if (msk->bind_type != type)
> - continue;
> -
> - if (!mctp_address_matches(msk->bind_addr, mh->dest))
> - continue;
> + /* Look for binds in order of widening scope. A given destination or
> + * source address also implies matching on a particular network.
> + *
> + * - Matching destination and source
> + * - Matching destination
> + * - Matching source
> + * - Matching network, any address
> + * - Any network or address
> + */
Note for a possible follow-up: a more idiomatic approach uses a
compute_score() function that respect the above priority and require a
single hash traversal, see, i.e. net/ipv4/udp.c
/P
Powered by blists - more mailing lists