[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20221123153314.483642-1-alexandr.lobakin@intel.com>
Date: Wed, 23 Nov 2022 16:33:14 +0100
From: Alexander Lobakin <alexandr.lobakin@...el.com>
To: Nikolay Borisov <nikolay.borisov@...tuozzo.com>
Cc: Alexander Lobakin <alexandr.lobakin@...el.com>,
nhorman@...driver.com, davem@...emloft.net, kuba@...nel.org,
pabeni@...hat.com, netdev@...r.kernel.org, kernel@...tuozzo.com
Subject: Re: [PATCH net-next v2 1/3] drop_monitor: Implement namespace filtering/reporting for software drops
From: Nikolay Borisov <nikolay.borisov@...tuozzo.com>
Date: Wed, 23 Nov 2022 16:28:15 +0200
> On hosts running multiple containers it's helpful to be able to see
> in which net namespace a particular drop occured. Additionally, it's
> also useful to limit drop point filtering to a single namespace,
> especially for hosts which are dropping skb's at a high rate.
>
> Signed-off-by: Nikolay Borisov <nikolay.borisov@...tuozzo.com>
> ---
> include/uapi/linux/net_dropmon.h | 2 ++
> net/core/drop_monitor.c | 36 ++++++++++++++++++++++++++++++--
> 2 files changed, 36 insertions(+), 2 deletions(-)
>
> diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
> index 84f622a66a7a..81eb2bd184e8 100644
> --- a/include/uapi/linux/net_dropmon.h
> +++ b/include/uapi/linux/net_dropmon.h
> @@ -8,6 +8,7 @@
> struct net_dm_drop_point {
> __u8 pc[8];
> __u32 count;
> + __u32 ns_id;
> };
>
> #define is_drop_point_hw(x) do {\
> @@ -94,6 +95,7 @@ enum net_dm_attr {
> NET_DM_ATTR_HW_DROPS, /* flag */
> NET_DM_ATTR_FLOW_ACTION_COOKIE, /* binary */
> NET_DM_ATTR_REASON, /* string */
> + NET_DM_ATTR_NS, /* u32 */
>
> __NET_DM_ATTR_MAX,
> NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
> diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
> index f084a4a6b7ab..680f54d21f38 100644
> --- a/net/core/drop_monitor.c
> +++ b/net/core/drop_monitor.c
> @@ -103,6 +103,7 @@ static unsigned long dm_hw_check_delta = 2*HZ;
> static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
> static u32 net_dm_trunc_len;
> static u32 net_dm_queue_len = 1000;
> +static u32 net_dm_ns;
>
> struct net_dm_alert_ops {
> void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
> @@ -210,6 +211,19 @@ static void sched_send_work(struct timer_list *t)
> schedule_work(&data->dm_alert_work);
> }
>
> +static bool drop_point_matches(struct net_dm_drop_point *point, void *location,
> + unsigned long ns_id)
> +{
> + if (net_dm_ns && point->ns_id == net_dm_ns &&
> + !memcmp(&location, &point->pc, sizeof(void *)))
^^^^^^^^^^^^^^
Nit: sizeof(location)?
> + return true;
> + else if (net_dm_ns == 0 && point->ns_id == ns_id &&
^^^^^^^^^^^^^^
Just `!net_dm_ns` is preferred.
> + !memcmp(&location, &point->pc, sizeof(void *)))
> + return true;
> + else
> + return false;
I think the dup of the last condition can be avoided with oring
`(net_dm_ns && ...) || (!net_dm_ns && ...)`. Then, true/false
becomes redundant:
return ((net_dm_ns && point->ns_id == net_dm_ns) ||
(!net_dm_ns && point->ns_id == ns_id)) &&
!memcmp(&location, &point->pc, sizeof(location));
> +}
> +
> static void trace_drop_common(struct sk_buff *skb, void *location)
> {
> struct net_dm_alert_msg *msg;
> @@ -219,7 +233,11 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
> int i;
> struct sk_buff *dskb;
> struct per_cpu_dm_data *data;
> - unsigned long flags;
> + unsigned long flags, ns_id = 0;
With that line extension, it breaks RCT style. Yeah, it's already
broken a line above, but let's not introduce more style issues %)
> +
> + if (skb->dev && net_dm_ns &&
It's faster to test net_dm_ns at first and then skb->dev. The former
is static on the BSS and the latter is dynamic. Plus the former will
be zeroed much more often than the latter.
> + dev_net(skb->dev)->ns.inum != net_dm_ns)
> + return;
>
> local_irq_save(flags);
> data = this_cpu_ptr(&dm_cpu_data);
> @@ -233,8 +251,10 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
> nla = genlmsg_data(nlmsg_data(nlh));
> msg = nla_data(nla);
> point = msg->points;
> + if (skb->dev)
> + ns_id = dev_net(skb->dev)->ns.inum;
> for (i = 0; i < msg->entries; i++) {
> - if (!memcmp(&location, &point->pc, sizeof(void *))) {
> + if (drop_point_matches(point, location, ns_id)) {
> point->count++;
> goto out;
> }
> @@ -249,6 +269,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
> nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
> memcpy(point->pc, &location, sizeof(void *));
> point->count = 1;
> + point->ns_id = ns_id;
> msg->entries++;
>
> if (!timer_pending(&data->send_timer)) {
> @@ -1283,6 +1304,14 @@ static void net_dm_trunc_len_set(struct genl_info *info)
> net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]);
> }
>
> +static void net_dm_ns_set(struct genl_info *info)
> +{
> + if (!info->attrs[NET_DM_ATTR_NS])
> + return;
> +
> + net_dm_ns = nla_get_u32(info->attrs[NET_DM_ATTR_NS]);
So, if I got it correctly, it can limit the scope to only one netns.
Isn't that not flexible enough? What about a white- or black- list
of NSes to filter or filter-out?
> +}
> +
> static void net_dm_queue_len_set(struct genl_info *info)
> {
> if (!info->attrs[NET_DM_ATTR_QUEUE_LEN])
> @@ -1310,6 +1339,8 @@ static int net_dm_cmd_config(struct sk_buff *skb,
>
> net_dm_queue_len_set(info);
>
> + net_dm_ns_set(info);
> +
> return 0;
> }
>
> @@ -1589,6 +1620,7 @@ static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
> [NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
> [NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 },
> [NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 },
> + [NET_DM_ATTR_NS] = { .type = NLA_U32 },
> [NET_DM_ATTR_SW_DROPS] = {. type = NLA_FLAG },
> [NET_DM_ATTR_HW_DROPS] = {. type = NLA_FLAG },
> };
> --
> 2.34.1
Thanks,
Olek
Powered by blists - more mailing lists