lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Sun, 2 Jun 2024 12:14:21 +0300
From: Tariq Toukan <ttoukan.linux@...il.com>
To: Joe Damato <jdamato@...tly.com>, linux-kernel@...r.kernel.org,
 netdev@...r.kernel.org
Cc: nalramli@...tly.com, Saeed Mahameed <saeedm@...dia.com>,
 Leon Romanovsky <leon@...nel.org>, "David S. Miller" <davem@...emloft.net>,
 Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>,
 Paolo Abeni <pabeni@...hat.com>, Richard Cochran <richardcochran@...il.com>,
 "open list:MELLANOX MLX5 core VPI driver" <linux-rdma@...r.kernel.org>,
 Tariq Toukan <tariqt@...dia.com>
Subject: Re: [RFC net-next v3 2/2] net/mlx5e: Add per queue netdev-genl stats



On 29/05/2024 6:16, Joe Damato wrote:
> Add functions to support the netdev-genl per queue stats API.
> 
> ./cli.py --spec netlink/specs/netdev.yaml \
>           --dump qstats-get --json '{"scope": "queue"}'
> 
> ...snip
> 
>   {'ifindex': 7,
>    'queue-id': 62,
>    'queue-type': 'rx',
>    'rx-alloc-fail': 0,
>    'rx-bytes': 105965251,
>    'rx-packets': 179790},
>   {'ifindex': 7,
>    'queue-id': 0,
>    'queue-type': 'tx',
>    'tx-bytes': 9402665,
>    'tx-packets': 17551},
> 
> ...snip
> 
> Also tested with the script tools/testing/selftests/drivers/net/stats.py
> in several scenarios to ensure stats tallying was correct:
> 
> - on boot (default queue counts)
> - adjusting queue count up or down (ethtool -L eth0 combined ...)
> - adding mqprio TCs

Please test also with interface down.

> 
> Signed-off-by: Joe Damato <jdamato@...tly.com>
> ---
>   .../net/ethernet/mellanox/mlx5/core/en_main.c | 132 ++++++++++++++++++
>   1 file changed, 132 insertions(+)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index ce15805ad55a..515c16a88a6c 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -39,6 +39,7 @@
>   #include <linux/debugfs.h>
>   #include <linux/if_bridge.h>
>   #include <linux/filter.h>
> +#include <net/netdev_queues.h>
>   #include <net/page_pool/types.h>
>   #include <net/pkt_sched.h>
>   #include <net/xdp_sock_drv.h>
> @@ -5293,6 +5294,136 @@ static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev)
>   	return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev));
>   }
>   
> +static void mlx5e_get_queue_stats_rx(struct net_device *dev, int i,
> +				     struct netdev_queue_stats_rx *stats)
> +{
> +	struct mlx5e_priv *priv = netdev_priv(dev);
> +	struct mlx5e_channel_stats *channel_stats;
> +	struct mlx5e_rq_stats *xskrq_stats;
> +	struct mlx5e_rq_stats *rq_stats;
> +
> +	if (mlx5e_is_uplink_rep(priv))
> +		return;
> +
> +	channel_stats = priv->channel_stats[i];
> +	xskrq_stats = &channel_stats->xskrq;
> +	rq_stats = &channel_stats->rq;
> +
> +	stats->packets = rq_stats->packets + xskrq_stats->packets;
> +	stats->bytes = rq_stats->bytes + xskrq_stats->bytes;
> +	stats->alloc_fail = rq_stats->buff_alloc_err +
> +			    xskrq_stats->buff_alloc_err;
> +}
> +
> +static void mlx5e_get_queue_stats_tx(struct net_device *dev, int i,
> +				     struct netdev_queue_stats_tx *stats)
> +{
> +	struct mlx5e_priv *priv = netdev_priv(dev);
> +	struct mlx5e_channel_stats *channel_stats;
> +	struct mlx5e_sq_stats *sq_stats;
> +	int ch_ix, tc_ix;
> +
> +	mutex_lock(&priv->state_lock);
> +	txq_ix_to_chtc_ix(&priv->channels.params, i, &ch_ix, &tc_ix);
> +	mutex_unlock(&priv->state_lock);
> +
> +	channel_stats = priv->channel_stats[ch_ix];
> +	sq_stats = &channel_stats->sq[tc_ix];
> +
> +	stats->packets = sq_stats->packets;
> +	stats->bytes = sq_stats->bytes;
> +}
> +
> +static void mlx5e_get_base_stats(struct net_device *dev,
> +				 struct netdev_queue_stats_rx *rx,
> +				 struct netdev_queue_stats_tx *tx)
> +{
> +	struct mlx5e_priv *priv = netdev_priv(dev);
> +	int i, j;
> +
> +	if (!mlx5e_is_uplink_rep(priv)) {
> +		rx->packets = 0;
> +		rx->bytes = 0;
> +		rx->alloc_fail = 0;
> +
> +		/* compute stats for deactivated RX queues
> +		 *
> +		 * if priv->channels.num == 0 the device is down, so compute
> +		 * stats for every queue.
> +		 *
> +		 * otherwise, compute only the queues which have been deactivated.
> +		 */
> +		mutex_lock(&priv->state_lock);
> +		if (priv->channels.num == 0)
> +			i = 0;

This is not consistent with the above implementation of 
mlx5e_get_queue_stats_rx(), which always returns the stats even if the 
channel is down.
This way, you'll double count the down channels.

I think you should always start from priv->channels.params.num_channels.

> +		else
> +			i = priv->channels.params.num_channels;
> +		mutex_unlock(&priv->state_lock);

I understand that you're following the guidelines by taking the lock 
here, I just don't think this improves anything... If channels can be 
modified in between calls to mlx5e_get_base_stats / 
mlx5e_get_queue_stats_rx, then wrapping the priv->channels access with a 
lock can help protect each single deref, but not necessarily in giving a 
consistent "screenshot" of the stats.

The rtnl_lock should take care of that, as the driver holds it when 
changing the number of channels and updating the real_numrx/tx_queues.

This said, I would carefully say you can drop the mutex once following 
the requested changes above.

> +
> +		for (; i < priv->stats_nch; i++) {
> +			struct netdev_queue_stats_rx rx_i = {0};
> +
> +			mlx5e_get_queue_stats_rx(dev, i, &rx_i);
> +
> +			rx->packets += rx_i.packets;
> +			rx->bytes += rx_i.bytes;
> +			rx->alloc_fail += rx_i.alloc_fail;
> +		}
> +
> +		if (priv->rx_ptp_opened) {
> +			struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq;
> +
> +			rx->packets += rq_stats->packets;
> +			rx->bytes += rq_stats->bytes;
> +		}
> +	}
> +
> +	tx->packets = 0;
> +	tx->bytes = 0;
> +
> +	mutex_lock(&priv->state_lock);
> +	for (i = 0; i < priv->stats_nch; i++) {
> +		struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i];
> +
> +		/* while iterating through all channels [0, stats_nch], there
> +		 * are two cases to handle:
> +		 *
> +		 *  1. the channel is available, so sum only the unavailable TCs
> +		 *     [mlx5e_get_dcb_num_tc, max_opened_tc).
> +		 *
> +		 *  2. the channel is unavailable, so sum all TCs [0, max_opened_tc).
> +		 */

I wonder why not call the local var 'tc'?

> +		if (i < priv->channels.params.num_channels) {
> +			j = mlx5e_get_dcb_num_tc(&priv->channels.params);
> +		} else {
> +			j = 0;
> +		}

Remove parenthesis, or use ternary op.

> +
> +		for (; j < priv->max_opened_tc; j++) {
> +			struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
> +
> +			tx->packets += sq_stats->packets;
> +			tx->bytes += sq_stats->bytes;
> +		}
> +	}
> +	mutex_unlock(&priv->state_lock);
> +

Same comment regarding dropping the mutex.

> +	if (priv->tx_ptp_opened) {
> +		for (j = 0; j < priv->max_opened_tc; j++) {
> +			struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[j];
> +
> +			tx->packets    += sq_stats->packets;
> +			tx->bytes      += sq_stats->bytes;
> +		}
> +	}
> +}
> +
> +static const struct netdev_stat_ops mlx5e_stat_ops = {
> +	.get_queue_stats_rx     = mlx5e_get_queue_stats_rx,
> +	.get_queue_stats_tx     = mlx5e_get_queue_stats_tx,
> +	.get_base_stats         = mlx5e_get_base_stats,
> +};
> +
>   static void mlx5e_build_nic_netdev(struct net_device *netdev)
>   {
>   	struct mlx5e_priv *priv = netdev_priv(netdev);
> @@ -5310,6 +5441,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
>   
>   	netdev->watchdog_timeo    = 15 * HZ;
>   
> +	netdev->stat_ops          = &mlx5e_stat_ops;
>   	netdev->ethtool_ops	  = &mlx5e_ethtool_ops;
>   
>   	netdev->vlan_features    |= NETIF_F_SG;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ