lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <143cf81c-d850-43c4-96b4-4fef840703b7@redhat.com>
Date: Wed, 10 Apr 2024 15:32:50 +0200
From: Adrian Moreno <amorenoz@...hat.com>
To: Aaron Conole <aconole@...hat.com>
Cc: Ilya Maximets <i.maximets@....org>, netdev@...r.kernel.org,
 jiri@...nulli.us, xiyou.wangcong@...il.com, cmi@...dia.com,
 yotam.gi@...il.com, echaudro@...hat.com, horms@...nel.org
Subject: Re: [RFC net-next v2 2/5] net: psample: add multicast filtering on
 group_id



On 4/9/24 16:43, Aaron Conole wrote:
> Adrian Moreno <amorenoz@...hat.com> writes:
> 
>> On 4/8/24 15:18, Ilya Maximets wrote:
>>> [copying my previous reply since this version actually has netdev@ in Cc]
>>> On 4/8/24 14:57, Adrian Moreno wrote:
>>>> Packet samples can come from several places (e.g: different tc sample
>>>> actions), typically using the sample group (PSAMPLE_ATTR_SAMPLE_GROUP)
>>>> to differentiate them.
>>>>
>>>> Likewise, sample consumers that listen on the multicast group may only
>>>> be interested on a single group. However, they are currently forced to
>>>> receive all samples and discard the ones that are not relevant, causing
>>>> unnecessary overhead.
>>>>
>>>> Allow users to filter on the desired group_id by adding a new command
>>>> SAMPLE_FILTER_SET that can be used to pass the desired group id.
>>>> Store this filter on the per-socket private pointer and use it for
>>>> filtering multicasted samples.
>>>>
>>>> Signed-off-by: Adrian Moreno <amorenoz@...hat.com>
>>>> ---
>>>>    include/uapi/linux/psample.h |   1 +
>>>>    net/psample/psample.c        | 127 +++++++++++++++++++++++++++++++++--
>>>>    2 files changed, 122 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h
>>>> index e585db5bf2d2..5e0305b1520d 100644
>>>> --- a/include/uapi/linux/psample.h
>>>> +++ b/include/uapi/linux/psample.h
>>>> @@ -28,6 +28,7 @@ enum psample_command {
>>>>    	PSAMPLE_CMD_GET_GROUP,
>>>>    	PSAMPLE_CMD_NEW_GROUP,
>>>>    	PSAMPLE_CMD_DEL_GROUP,
>>>> +	PSAMPLE_CMD_SAMPLE_FILTER_SET,
>>> Other commands are names as PSAMPLE_CMD_VERB_NOUN, so this new one
>>> should be PSAMPLE_CMD_SET_FILTER.  (The SAMPLE part seems unnecessary.)
>>> Some functions/structures need to be renamed accordingly.
>>>
>>
>> Sure, I'll rename it when I sent the next version.
>>
>>>>    };
>>>>      enum psample_tunnel_key_attr {
>>>> diff --git a/net/psample/psample.c b/net/psample/psample.c
>>>> index a5d9b8446f77..a0cef63dfdec 100644
>>>> --- a/net/psample/psample.c
>>>> +++ b/net/psample/psample.c
>>>> @@ -98,13 +98,84 @@ static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg,
>>>>    	return msg->len;
>>>>    }
>>>>    -static const struct genl_small_ops psample_nl_ops[] = {
>>>> +struct psample_obj_desc {
>>>> +	struct rcu_head rcu;
>>>> +	u32 group_num;
>>>> +	bool group_num_valid;
>>>> +};
>>>> +
>>>> +struct psample_nl_sock_priv {
>>>> +	struct psample_obj_desc __rcu *flt;
>>> Can we call it 'fileter' ?  I find it hard to read the code with
>>> this unnecessary abbreviation.  Same for the lock below.
>>>
>>
>> Sure.
>>
>>>> +	spinlock_t flt_lock; /* Protects flt. */
>>>> +};
>>>> +
>>>> +static void psample_nl_sock_priv_init(void *priv)
>>>> +{
>>>> +	struct psample_nl_sock_priv *sk_priv = priv;
>>>> +
>>>> +	spin_lock_init(&sk_priv->flt_lock);
>>>> +}
>>>> +
>>>> +static void psample_nl_sock_priv_destroy(void *priv)
>>>> +{
>>>> +	struct psample_nl_sock_priv *sk_priv = priv;
>>>> +	struct psample_obj_desc *flt;
>>>> +
>>>> +	flt = rcu_dereference_protected(sk_priv->flt, true);
>>>> +	kfree_rcu(flt, rcu);
>>>> +}
>>>> +
>>>> +static int psample_nl_sample_filter_set_doit(struct sk_buff *skb,
>>>> +					     struct genl_info *info)
>>>> +{
>>>> +	struct psample_nl_sock_priv *sk_priv;
>>>> +	struct nlattr **attrs = info->attrs;
>>>> +	struct psample_obj_desc *flt;
>>>> +
>>>> +	flt = kzalloc(sizeof(*flt), GFP_KERNEL);
>>>> +
>>>> +	if (attrs[PSAMPLE_ATTR_SAMPLE_GROUP]) {
>>>> +		flt->group_num = nla_get_u32(attrs[PSAMPLE_ATTR_SAMPLE_GROUP]);
>>>> +		flt->group_num_valid = true;
>>>> +	}
>>>> +
>>>> +	if (!flt->group_num_valid) {
>>>> +		kfree(flt);
>>> Might be better to not allocate it in the first place.
>>>
>>
>> Absolutely.
>>
>>>> +		flt = NULL;
>>>> +	}
>>>> +
>>>> +	sk_priv = genl_sk_priv_get(&psample_nl_family, NETLINK_CB(skb).sk);
>>>> +	if (IS_ERR(sk_priv)) {
>>>> +		kfree(flt);
>>>> +		return PTR_ERR(sk_priv);
>>>> +	}
>>>> +
>>>> +	spin_lock(&sk_priv->flt_lock);
>>>> +	flt = rcu_replace_pointer(sk_priv->flt, flt,
>>>> +				  lockdep_is_held(&sk_priv->flt_lock));
>>>> +	spin_unlock(&sk_priv->flt_lock);
>>>> +	kfree_rcu(flt, rcu);
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static const struct nla_policy
>>>> +	psample_sample_filter_set_policy[PSAMPLE_ATTR_SAMPLE_GROUP + 1] = {
>>>> +	[PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NLA_U32, },
>>> This indentation is confusing, though I'm not sure what's a better
>>> way.
>>>
>>
>> I now! I'll try to move it around see if it improves things.
>>
>>>> +};
>>>> +
>>>> +static const struct genl_ops psample_nl_ops[] = {
>>>>    	{
>>>>    		.cmd = PSAMPLE_CMD_GET_GROUP,
>>>>    		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
>>>>    		.dumpit = psample_nl_cmd_get_group_dumpit,
>>>>    		/* can be retrieved by unprivileged users */
>>>> -	}
>>>> +	},
>>>> +	{
>>>> +		.cmd		= PSAMPLE_CMD_SAMPLE_FILTER_SET,
>>>> +		.doit		= psample_nl_sample_filter_set_doit,
>>>> +		.policy		= psample_sample_filter_set_policy,
>>>> +		.flags		= 0,
>>>> +	},
>>>>    };
>>>>      static struct genl_family psample_nl_family __ro_after_init = {
>>>> @@ -114,10 +185,13 @@ static struct genl_family psample_nl_family __ro_after_init = {
>>>>    	.netnsok	= true,
>>>>    	.module		= THIS_MODULE,
>>>>    	.mcgrps		= psample_nl_mcgrps,
>>>> -	.small_ops	= psample_nl_ops,
>>>> -	.n_small_ops	= ARRAY_SIZE(psample_nl_ops),
>>>> +	.ops		= psample_nl_ops,
>>>> +	.n_ops		= ARRAY_SIZE(psample_nl_ops),
>>>>    	.resv_start_op	= PSAMPLE_CMD_GET_GROUP + 1,
>>>>    	.n_mcgrps	= ARRAY_SIZE(psample_nl_mcgrps),
>>>> +	.sock_priv_size		= sizeof(struct psample_nl_sock_priv),
>>>> +	.sock_priv_init		= psample_nl_sock_priv_init,
>>>> +	.sock_priv_destroy	= psample_nl_sock_priv_destroy,
>>>>    };
>>>>      static void psample_group_notify(struct psample_group *group,
>>>> @@ -360,6 +434,42 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
>>>>    }
>>>>    #endif
>>>>    +static inline void psample_nl_obj_desc_init(struct
>>>> psample_obj_desc *desc,
>>>> +					    u32 group_num)
>>>> +{
>>>> +	memset(desc, 0, sizeof(*desc));
>>>> +	desc->group_num = group_num;
>>>> +	desc->group_num_valid = true;
>>>> +}
>>>> +
>>>> +static bool psample_obj_desc_match(struct psample_obj_desc *desc,
>>>> +				   struct psample_obj_desc *flt)
>>>> +{
>>>> +	if (desc->group_num_valid && flt->group_num_valid &&
>>>> +	    desc->group_num != flt->group_num)
>>>> +		return false;
>>>> +	return true;
>>> This fucntion returns 'true' if one of the arguments is not valid.
>>> I'd not expect such behavior from a 'match' function.
>>> I understand the intention that psample should sample everything
>>> to sockets that do not request filters, but that should not be part
>>> of the 'match' logic, or more appropriate function name should be
>>> chosen.  Also, if the group is not initialized, but the filter is,
>>> it should not match, logically.  The validity on filter and the
>>> current sample is not symmetric.
>>>
>>
>> The descriptor should always be initialized but I think double
>> checking should be OK as in the context of this particular function,
>> it might not be clear it is.
>>
>>> And I'm not really sure if the 'group_num_valid' is actually needed.
>>> Can the NULL pointer be used as an indicator?  If so, then maybe
>>> the whole psample_obj_desc structure is not needed as it will
>>> contain a single field.
>>
>> If we only filter on group_id, then yes. However, as I was writing
>> this, I thought maybe opening the door to filtering on more fields
>> such as the protocol in/out interfaces, etc. Now that I read this I
>> understand the current code is confusing: I should have left a comment
>> or mention it in the commit message.
> 
> If you want to have such filtering options, does it make sense to
> instead have the listening program send a set of bpf instructions for
> filtering instead?  I think the data should be available at the point
> where simple bpf is attached (SO_ATTACH_BPF to the psample socket, and
> the filter should run as part of the broadcast message IIRC since it
> populates the sk_filter field).
> 

That's a good point. I hope parsing the netlink messages won't be too cumbersome.
So let's limit it to group_ids. How about filtering on a number of group_ids? Is 
that worth it?


>>>
>>>> +}
>>>> +
>>>> +static int psample_nl_sample_filter(struct sock *dsk, struct sk_buff *skb,
>>>> +				    void *data)
>>>> +{
>>>> +	struct psample_obj_desc *desc = data;
>>>> +	struct psample_nl_sock_priv *sk_priv;
>>>> +	struct psample_obj_desc *flt;
>>>> +	int ret = 0;
>>>> +
>>>> +	rcu_read_lock();
>>>> +	sk_priv = __genl_sk_priv_get(&psample_nl_family, dsk);
>>>> +	if (!IS_ERR_OR_NULL(sk_priv)) {
>>>> +		flt = rcu_dereference(sk_priv->flt);
>>>> +		if (flt)
>>>> +			ret = !psample_obj_desc_match(desc, flt);
>>>> +	}
>>>> +	rcu_read_unlock();
>>>> +	return ret;
>>>> +}
>>>> +
>>>>    void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
>>>>    			   u32 sample_rate, const struct psample_metadata *md)
>>>>    {
>>>> @@ -370,6 +480,7 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
>>>>    #ifdef CONFIG_INET
>>>>    	struct ip_tunnel_info *tun_info;
>>>>    #endif
>>>> +	struct psample_obj_desc desc;
>>>>    	struct sk_buff *nl_skb;
>>>>    	int data_len;
>>>>    	int meta_len;
>>>> @@ -487,8 +598,12 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
>>>>    #endif
>>>>      	genlmsg_end(nl_skb, data);
>>>> -	genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0,
>>>> -				PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC);
>>>> +	psample_nl_obj_desc_init(&desc, group->group_num);
>>>> +	genlmsg_multicast_netns_filtered(&psample_nl_family,
>>>> +					 group->net, nl_skb, 0,
>>>> +					 PSAMPLE_NL_MCGRP_SAMPLE,
>>>> +					 GFP_ATOMIC, psample_nl_sample_filter,
>>>> +					 &desc);
>>>>      	return;
>>>>    error:
>>>
> 


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ