[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2b79ebe9-4e83-418a-ae40-93024a3fb433@ovn.org>
Date: Mon, 8 Apr 2024 15:18:18 +0200
From: Ilya Maximets <i.maximets@....org>
To: Adrian Moreno <amorenoz@...hat.com>, netdev@...r.kernel.org
Cc: i.maximets@....org, jiri@...nulli.us, xiyou.wangcong@...il.com,
cmi@...dia.com, yotam.gi@...il.com, aconole@...hat.com, echaudro@...hat.com,
horms@...nel.org
Subject: Re: [RFC net-next v2 2/5] net: psample: add multicast filtering on
group_id
[copying my previous reply since this version actually has netdev@ in Cc]
On 4/8/24 14:57, Adrian Moreno wrote:
> Packet samples can come from several places (e.g: different tc sample
> actions), typically using the sample group (PSAMPLE_ATTR_SAMPLE_GROUP)
> to differentiate them.
>
> Likewise, sample consumers that listen on the multicast group may only
> be interested on a single group. However, they are currently forced to
> receive all samples and discard the ones that are not relevant, causing
> unnecessary overhead.
>
> Allow users to filter on the desired group_id by adding a new command
> SAMPLE_FILTER_SET that can be used to pass the desired group id.
> Store this filter on the per-socket private pointer and use it for
> filtering multicasted samples.
>
> Signed-off-by: Adrian Moreno <amorenoz@...hat.com>
> ---
> include/uapi/linux/psample.h | 1 +
> net/psample/psample.c | 127 +++++++++++++++++++++++++++++++++--
> 2 files changed, 122 insertions(+), 6 deletions(-)
>
> diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h
> index e585db5bf2d2..5e0305b1520d 100644
> --- a/include/uapi/linux/psample.h
> +++ b/include/uapi/linux/psample.h
> @@ -28,6 +28,7 @@ enum psample_command {
> PSAMPLE_CMD_GET_GROUP,
> PSAMPLE_CMD_NEW_GROUP,
> PSAMPLE_CMD_DEL_GROUP,
> + PSAMPLE_CMD_SAMPLE_FILTER_SET,
Other commands are names as PSAMPLE_CMD_VERB_NOUN, so this new one
should be PSAMPLE_CMD_SET_FILTER. (The SAMPLE part seems unnecessary.)
Some functions/structures need to be renamed accordingly.
> };
>
> enum psample_tunnel_key_attr {
> diff --git a/net/psample/psample.c b/net/psample/psample.c
> index a5d9b8446f77..a0cef63dfdec 100644
> --- a/net/psample/psample.c
> +++ b/net/psample/psample.c
> @@ -98,13 +98,84 @@ static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg,
> return msg->len;
> }
>
> -static const struct genl_small_ops psample_nl_ops[] = {
> +struct psample_obj_desc {
> + struct rcu_head rcu;
> + u32 group_num;
> + bool group_num_valid;
> +};
> +
> +struct psample_nl_sock_priv {
> + struct psample_obj_desc __rcu *flt;
Can we call it 'fileter' ? I find it hard to read the code with
this unnecessary abbreviation. Same for the lock below.
> + spinlock_t flt_lock; /* Protects flt. */
> +};
> +
> +static void psample_nl_sock_priv_init(void *priv)
> +{
> + struct psample_nl_sock_priv *sk_priv = priv;
> +
> + spin_lock_init(&sk_priv->flt_lock);
> +}
> +
> +static void psample_nl_sock_priv_destroy(void *priv)
> +{
> + struct psample_nl_sock_priv *sk_priv = priv;
> + struct psample_obj_desc *flt;
> +
> + flt = rcu_dereference_protected(sk_priv->flt, true);
> + kfree_rcu(flt, rcu);
> +}
> +
> +static int psample_nl_sample_filter_set_doit(struct sk_buff *skb,
> + struct genl_info *info)
> +{
> + struct psample_nl_sock_priv *sk_priv;
> + struct nlattr **attrs = info->attrs;
> + struct psample_obj_desc *flt;
> +
> + flt = kzalloc(sizeof(*flt), GFP_KERNEL);
> +
> + if (attrs[PSAMPLE_ATTR_SAMPLE_GROUP]) {
> + flt->group_num = nla_get_u32(attrs[PSAMPLE_ATTR_SAMPLE_GROUP]);
> + flt->group_num_valid = true;
> + }
> +
> + if (!flt->group_num_valid) {
> + kfree(flt);
Might be better to not allocate it in the first place.
> + flt = NULL;
> + }
> +
> + sk_priv = genl_sk_priv_get(&psample_nl_family, NETLINK_CB(skb).sk);
> + if (IS_ERR(sk_priv)) {
> + kfree(flt);
> + return PTR_ERR(sk_priv);
> + }
> +
> + spin_lock(&sk_priv->flt_lock);
> + flt = rcu_replace_pointer(sk_priv->flt, flt,
> + lockdep_is_held(&sk_priv->flt_lock));
> + spin_unlock(&sk_priv->flt_lock);
> + kfree_rcu(flt, rcu);
> + return 0;
> +}
> +
> +static const struct nla_policy
> + psample_sample_filter_set_policy[PSAMPLE_ATTR_SAMPLE_GROUP + 1] = {
> + [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NLA_U32, },
This indentation is confusing, though I'm not sure what's a better way.
> +};
> +
> +static const struct genl_ops psample_nl_ops[] = {
> {
> .cmd = PSAMPLE_CMD_GET_GROUP,
> .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
> .dumpit = psample_nl_cmd_get_group_dumpit,
> /* can be retrieved by unprivileged users */
> - }
> + },
> + {
> + .cmd = PSAMPLE_CMD_SAMPLE_FILTER_SET,
> + .doit = psample_nl_sample_filter_set_doit,
> + .policy = psample_sample_filter_set_policy,
> + .flags = 0,
> + },
> };
>
> static struct genl_family psample_nl_family __ro_after_init = {
> @@ -114,10 +185,13 @@ static struct genl_family psample_nl_family __ro_after_init = {
> .netnsok = true,
> .module = THIS_MODULE,
> .mcgrps = psample_nl_mcgrps,
> - .small_ops = psample_nl_ops,
> - .n_small_ops = ARRAY_SIZE(psample_nl_ops),
> + .ops = psample_nl_ops,
> + .n_ops = ARRAY_SIZE(psample_nl_ops),
> .resv_start_op = PSAMPLE_CMD_GET_GROUP + 1,
> .n_mcgrps = ARRAY_SIZE(psample_nl_mcgrps),
> + .sock_priv_size = sizeof(struct psample_nl_sock_priv),
> + .sock_priv_init = psample_nl_sock_priv_init,
> + .sock_priv_destroy = psample_nl_sock_priv_destroy,
> };
>
> static void psample_group_notify(struct psample_group *group,
> @@ -360,6 +434,42 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
> }
> #endif
>
> +static inline void psample_nl_obj_desc_init(struct psample_obj_desc *desc,
> + u32 group_num)
> +{
> + memset(desc, 0, sizeof(*desc));
> + desc->group_num = group_num;
> + desc->group_num_valid = true;
> +}
> +
> +static bool psample_obj_desc_match(struct psample_obj_desc *desc,
> + struct psample_obj_desc *flt)
> +{
> + if (desc->group_num_valid && flt->group_num_valid &&
> + desc->group_num != flt->group_num)
> + return false;
> + return true;
This fucntion returns 'true' if one of the arguments is not valid.
I'd not expect such behavior from a 'match' function.
I understand the intention that psample should sample everything
to sockets that do not request filters, but that should not be part
of the 'match' logic, or more appropriate function name should be
chosen. Also, if the group is not initialized, but the filter is,
it should not match, logically. The validity on filter and the
current sample is not symmetric.
And I'm not really sure if the 'group_num_valid' is actually needed.
Can the NULL pointer be used as an indicator? If so, then maybe
the whole psample_obj_desc structure is not needed as it will
contain a single field.
> +}
> +
> +static int psample_nl_sample_filter(struct sock *dsk, struct sk_buff *skb,
> + void *data)
> +{
> + struct psample_obj_desc *desc = data;
> + struct psample_nl_sock_priv *sk_priv;
> + struct psample_obj_desc *flt;
> + int ret = 0;
> +
> + rcu_read_lock();
> + sk_priv = __genl_sk_priv_get(&psample_nl_family, dsk);
> + if (!IS_ERR_OR_NULL(sk_priv)) {
> + flt = rcu_dereference(sk_priv->flt);
> + if (flt)
> + ret = !psample_obj_desc_match(desc, flt);
> + }
> + rcu_read_unlock();
> + return ret;
> +}
> +
> void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
> u32 sample_rate, const struct psample_metadata *md)
> {
> @@ -370,6 +480,7 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
> #ifdef CONFIG_INET
> struct ip_tunnel_info *tun_info;
> #endif
> + struct psample_obj_desc desc;
> struct sk_buff *nl_skb;
> int data_len;
> int meta_len;
> @@ -487,8 +598,12 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
> #endif
>
> genlmsg_end(nl_skb, data);
> - genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0,
> - PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC);
> + psample_nl_obj_desc_init(&desc, group->group_num);
> + genlmsg_multicast_netns_filtered(&psample_nl_family,
> + group->net, nl_skb, 0,
> + PSAMPLE_NL_MCGRP_SAMPLE,
> + GFP_ATOMIC, psample_nl_sample_filter,
> + &desc);
>
> return;
> error:
Powered by blists - more mailing lists