[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <230701b9-c52a-4b59-9969-4cd5a5d697f4@gmail.com>
Date: Tue, 14 May 2024 21:44:37 +0300
From: Tariq Toukan <ttoukan.linux@...il.com>
To: Joe Damato <jdamato@...tly.com>, linux-kernel@...r.kernel.org,
netdev@...r.kernel.org
Cc: zyjzyj2000@...il.com, nalramli@...tly.com,
Saeed Mahameed <saeedm@...dia.com>, Leon Romanovsky <leon@...nel.org>,
"David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>,
Richard Cochran <richardcochran@...il.com>,
"open list:MELLANOX MLX5 core VPI driver" <linux-rdma@...r.kernel.org>,
Tariq Toukan <tariqt@...dia.com>
Subject: Re: [PATCH net-next v2 1/1] net/mlx5e: Add per queue netdev-genl
stats
On 10/05/2024 7:17, Joe Damato wrote:
> Add functions to support the netdev-genl per queue stats API.
>
> ./cli.py --spec netlink/specs/netdev.yaml \
> --dump qstats-get --json '{"scope": "queue"}'
>
> ...snip
>
> {'ifindex': 7,
> 'queue-id': 62,
> 'queue-type': 'rx',
> 'rx-alloc-fail': 0,
> 'rx-bytes': 105965251,
> 'rx-packets': 179790},
> {'ifindex': 7,
> 'queue-id': 0,
> 'queue-type': 'tx',
> 'tx-bytes': 9402665,
> 'tx-packets': 17551},
>
> ...snip
>
> Also tested with the script tools/testing/selftests/drivers/net/stats.py
> in several scenarios to ensure stats tallying was correct:
>
> - on boot (default queue counts)
> - adjusting queue count up or down (ethtool -L eth0 combined ...)
> - adding mqprio TCs
>
> Signed-off-by: Joe Damato <jdamato@...tly.com>
> ---
> .../net/ethernet/mellanox/mlx5/core/en_main.c | 144 ++++++++++++++++++
> 1 file changed, 144 insertions(+)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index ffe8919494d5..4a675d8b31b5 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -39,6 +39,7 @@
> #include <linux/debugfs.h>
> #include <linux/if_bridge.h>
> #include <linux/filter.h>
> +#include <net/netdev_queues.h>
> #include <net/page_pool/types.h>
> #include <net/pkt_sched.h>
> #include <net/xdp_sock_drv.h>
> @@ -5282,6 +5283,148 @@ static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev)
> return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev));
> }
>
> +static void mlx5e_get_queue_stats_rx(struct net_device *dev, int i,
> + struct netdev_queue_stats_rx *stats)
> +{
> + struct mlx5e_priv *priv = netdev_priv(dev);
> +
> + if (mlx5e_is_uplink_rep(priv))
> + return;
> +
> + struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i];
> + struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
> + struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
> +
Don't we allow variable declaration only at the beginning of a block?
Is this style accepted in the networking subsystem?
> + stats->packets = rq_stats->packets + xskrq_stats->packets;
> + stats->bytes = rq_stats->bytes + xskrq_stats->bytes;
> + stats->alloc_fail = rq_stats->buff_alloc_err +
> + xskrq_stats->buff_alloc_err;
> +}
> +
> +static void mlx5e_get_queue_stats_tx(struct net_device *dev, int i,
> + struct netdev_queue_stats_tx *stats)
> +{
> + struct mlx5e_priv *priv = netdev_priv(dev);
> + struct net_device *netdev = priv->netdev;
> + struct mlx5e_txqsq *sq;
> + int j;
> +
> + if (mlx5e_is_uplink_rep(priv))
> + return;
> +
> + for (j = 0; j < netdev->num_tx_queues; j++) {
> + sq = priv->txq2sq[j];
No sq instance in case interface is down.
This should be a simple arithmetic calculation.
Need to expose the proper functions for this calculation, and use it
here and in the sq create flows.
Here it seems that you need a very involved user, so he passes the
correct index i of the SQ that he's interested in..
> + if (sq->ch_ix == i) {
So you're looking for the first SQ on channel i?
But there might be multiple SQs on channel i...
Also, this SQ might be already included in the base stats.
In addition, this i might be too large for a channel index
(num_tx_queues can be 8 * num_channels)
The logic here (of mapping from i in num_tx_queues to SQ stats) needs
careful definition.
> + stats->packets = sq->stats->packets;
> + stats->bytes = sq->stats->bytes;
> + return;
> + }
> + }
> +}
> +
> +static void mlx5e_get_base_stats(struct net_device *dev,
> + struct netdev_queue_stats_rx *rx,
> + struct netdev_queue_stats_tx *tx)
> +{
> + struct mlx5e_priv *priv = netdev_priv(dev);
> + int i, j;
> +
> + if (!mlx5e_is_uplink_rep(priv)) {
> + rx->packets = 0;
> + rx->bytes = 0;
> + rx->alloc_fail = 0;
> +
> + /* compute stats for deactivated RX queues
> + *
> + * if priv->channels.num == 0 the device is down, so compute
> + * stats for every queue.
> + *
> + * otherwise, compute only the queues which have been deactivated.
> + */
> + if (priv->channels.num == 0)
> + i = 0;
> + else
> + i = priv->channels.params.num_channels;
> +
> + for (; i < priv->stats_nch; i++) {
> + struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i];
> + struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
> + struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
> +
> + rx->packets += rq_stats->packets + xskrq_stats->packets;
> + rx->bytes += rq_stats->bytes + xskrq_stats->bytes;
> + rx->alloc_fail += rq_stats->buff_alloc_err +
> + xskrq_stats->buff_alloc_err;
Isn't this equivalent to mlx5e_get_queue_stats_rx(i) ?
> + }
> +
> + if (priv->rx_ptp_opened) {
> + struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq;
> +
> + rx->packets += rq_stats->packets;
> + rx->bytes += rq_stats->bytes;
> + }
> + }
> +
> + tx->packets = 0;
> + tx->bytes = 0;
> +
> + /* three TX cases to handle:
> + *
> + * case 1: priv->channels.num == 0, get the stats for every TC
> + * on every queue.
> + *
> + * case 2: priv->channel.num > 0, so get the stats for every TC on
> + * every deactivated queue.
> + *
> + * case 3: the number of TCs has changed, so get the stats for the
> + * inactive TCs on active TX queues (handled in the second loop
> + * below).
> + */
> + if (priv->channels.num == 0)
> + i = 0;
> + else
> + i = priv->channels.params.num_channels;
> +
All reads/writes to priv->channels must be under the priv->state_lock.
> + for (; i < priv->stats_nch; i++) {
> + struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i];
> +
> + for (j = 0; j < priv->max_opened_tc; j++) {
> + struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
> +
> + tx->packets += sq_stats->packets;
> + tx->bytes += sq_stats->bytes;
> + }
> + }
> +
> + /* Handle case 3 described above. */
> + for (i = 0; i < priv->channels.params.num_channels; i++) {
> + struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i];
> + u8 dcb_num_tc = mlx5e_get_dcb_num_tc(&priv->channels.params);
> +
> + for (j = dcb_num_tc; j < priv->max_opened_tc; j++) {
> + struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
> +
> + tx->packets += sq_stats->packets;
> + tx->bytes += sq_stats->bytes;
> + }
> + }
> +
> + if (priv->tx_ptp_opened) {
> + for (j = 0; j < priv->max_opened_tc; j++) {
> + struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[j];
> +
> + tx->packets += sq_stats->packets;
> + tx->bytes += sq_stats->bytes;
> + }
> + }
> +}
> +
> +static const struct netdev_stat_ops mlx5e_stat_ops = {
> + .get_queue_stats_rx = mlx5e_get_queue_stats_rx,
> + .get_queue_stats_tx = mlx5e_get_queue_stats_tx,
> + .get_base_stats = mlx5e_get_base_stats,
> +};
> +
> static void mlx5e_build_nic_netdev(struct net_device *netdev)
> {
> struct mlx5e_priv *priv = netdev_priv(netdev);
> @@ -5299,6 +5442,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
>
> netdev->watchdog_timeo = 15 * HZ;
>
> + netdev->stat_ops = &mlx5e_stat_ops;
> netdev->ethtool_ops = &mlx5e_ethtool_ops;
>
> netdev->vlan_features |= NETIF_F_SG;
Powered by blists - more mailing lists