lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-ID: <05e949b6-030c-e51f-bdf1-7f159a5cbb55@mellanox.com> Date: Mon, 21 Jan 2019 11:32:07 +0000 From: Eran Ben Elisha <eranbe@...lanox.com> To: Jiri Pirko <jiri@...nulli.us> CC: "netdev@...r.kernel.org" <netdev@...r.kernel.org>, Jiri Pirko <jiri@...lanox.com>, "David S. Miller" <davem@...emloft.net>, Ariel Almog <ariela@...lanox.com>, Aya Levin <ayal@...lanox.com>, Moshe Shemesh <moshe@...lanox.com> Subject: Re: [PATCH net-next v2 09/11] net/mlx5e: Add TX reporter support On 1/20/2019 1:06 PM, Jiri Pirko wrote: > Thu, Jan 17, 2019 at 10:59:18PM CET, eranbe@...lanox.com wrote: >> Add mlx5e tx reporter to devlink health reporters. This reporter will be >> responsible for diagnosing, reporting and recovering of TX errors. >> This patch declares the TX reporter operations and allocate it using the >> devlink health API. Currently, this reporter supports reporting and >> recovering from send error CQE only. In addition, it adds diagnose >> information for the open SQs. >> >> For a local SQ recover (due to driver error report), in case of SQ recover >> failure, the recover operation will be considered as a failure. >> For a full TX recover, an attempt to close and open the channels will be >> done. If this one passed successfully, it will be considered as a >> successful recover. >> >> The SQ recover from error CQE flow is not a new feature in the driver, >> this patch re-organize the functions and adapt them for the devlink >> health API. For this purpose, move code from en_main.c to a new file >> named reporter_tx.c. >> >> Signed-off-by: Eran Ben Elisha <eranbe@...lanox.com> >> Reviewed-by: Saeed Mahameed <saeedm@...lanox.com> >> --- >> .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- >> drivers/net/ethernet/mellanox/mlx5/core/en.h | 18 +- >> .../ethernet/mellanox/mlx5/core/en/reporter.h | 14 + >> .../mellanox/mlx5/core/en/reporter_tx.c | 321 ++++++++++++++++++ >> .../net/ethernet/mellanox/mlx5/core/en_main.c | 135 +------- >> .../net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +- >> 6 files changed, 367 insertions(+), 125 deletions(-) >> create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h >> create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c >> >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile >> index 9de9abacf7f6..6bb2a860b15b 100644 >> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile >> @@ -22,7 +22,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ >> # >> mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ >> en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ >> - en_selftest.o en/port.o en/monitor_stats.o >> + en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o >> >> # >> # Netdev extra >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h >> index 8fa8fdd30b85..27e276c9bf84 100644 >> --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h >> @@ -388,10 +388,7 @@ struct mlx5e_txqsq { >> struct mlx5e_channel *channel; >> int txq_ix; >> u32 rate_limit; >> - struct mlx5e_txqsq_recover { >> - struct work_struct recover_work; >> - u64 last_recover; >> - } recover; >> + struct work_struct recover_work; >> } ____cacheline_aligned_in_smp; >> >> struct mlx5e_dma_info { >> @@ -682,6 +679,13 @@ struct mlx5e_rss_params { >> u8 hfunc; >> }; >> >> +struct mlx5e_modify_sq_param { >> + int curr_state; >> + int next_state; >> + int rl_update; >> + int rl_index; >> +}; >> + >> struct mlx5e_priv { >> /* priv data path fields - start */ >> struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; >> @@ -737,6 +741,7 @@ struct mlx5e_priv { >> #ifdef CONFIG_MLX5_EN_TLS >> struct mlx5e_tls *tls; >> #endif >> + struct devlink_health_reporter *tx_reporter; >> }; >> >> struct mlx5e_profile { >> @@ -866,6 +871,11 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); >> void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, >> struct mlx5e_params *params); >> >> +int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, >> + struct mlx5e_modify_sq_param *p); >> +void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq); >> +void mlx5e_tx_disable_queue(struct netdev_queue *txq); >> + >> static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) >> { >> return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) && >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h >> new file mode 100644 >> index 000000000000..74083e120ab3 >> --- /dev/null >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h >> @@ -0,0 +1,14 @@ >> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ >> +/* Copyright (c) 2018 Mellanox Technologies. */ >> + >> +#ifndef __MLX5E_EN_REPORTER_H >> +#define __MLX5E_EN_REPORTER_H >> + >> +#include <linux/mlx5/driver.h> >> +#include "en.h" >> + >> +int mlx5e_tx_reporter_create(struct mlx5e_priv *priv); >> +void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv); >> +void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq); >> + >> +#endif >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c >> new file mode 100644 >> index 000000000000..9800df4909c2 >> --- /dev/null >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c >> @@ -0,0 +1,321 @@ >> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ >> +/* Copyright (c) 2018 Mellanox Technologies. */ >> + >> +#include <net/devlink.h> >> +#include "reporter.h" >> +#include "lib/eq.h" >> + >> +#define MLX5E_TX_REPORTER_PER_SQ_MAX_LEN 256 > > Some made-up number. I don't like how this whole thing is do. I will > comment on it in the devlink part. > > > [...] > > >> +static int >> +mlx5e_tx_reporter_build_diagnose_output(struct devlink_health_buffer *buffer, >> + u32 sqn, u8 state, u8 stopped) >> +{ >> + int err, i; >> + int nest = 0; >> + char name[20]; >> + >> + err = devlink_health_buffer_nest_start(buffer, >> + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT); >> + if (err) >> + goto buffer_error; >> + nest++; >> + >> + err = devlink_health_buffer_nest_start(buffer, >> + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR); >> + if (err) >> + goto buffer_error; >> + nest++; >> + >> + sprintf(name, "SQ 0x%x", sqn); > > No. The whole idea of having the message build up with nested attributes > (json-like) was to avoid things like this. No sprintfs please. If you > want to do sprintf, most of the time you are doing something wrong. I wanted that each SQ object will have a unique name. But I can merge the sqn into its attributes instead. > > >> + err = devlink_health_buffer_put_object_name(buffer, name); >> + if (err) >> + goto buffer_error; >> + >> + err = devlink_health_buffer_nest_start(buffer, >> + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE); >> + if (err) >> + goto buffer_error; >> + nest++; >> + >> + err = devlink_health_buffer_nest_start(buffer, >> + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT); >> + if (err) >> + goto buffer_error; >> + nest++; >> + >> + err = devlink_health_buffer_nest_start(buffer, >> + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR); >> + if (err) >> + goto buffer_error; >> + nest++; >> + >> + err = devlink_health_buffer_put_object_name(buffer, "HW state"); >> + if (err) >> + goto buffer_error; >> + >> + err = devlink_health_buffer_nest_start(buffer, >> + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE); > > How could you put an object name and don't start nesting? It looks > implicit to me. I will add some helper functions that you could review. Just keep in mind the implicit nest start must come with implicit nest end, so it won't fit into every use... > > > >> + if (err) >> + goto buffer_error; >> + nest++; >> + >> + err = devlink_health_buffer_put_value_u8(buffer, state); >> + if (err) >> + goto buffer_error; >> + >> + devlink_health_buffer_nest_end(buffer); /* DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE */ >> + nest--; >> + >> + devlink_health_buffer_nest_end(buffer); /* DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR */ >> + nest--; >> + >> + err = devlink_health_buffer_nest_start(buffer, >> + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR); >> + if (err) >> + goto buffer_error; >> + nest++; >> + >> + err = devlink_health_buffer_put_object_name(buffer, "stopped"); >> + if (err) >> + goto buffer_error; >> + >> + err = devlink_health_buffer_nest_start(buffer, >> + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE); >> + if (err) >> + goto buffer_error; >> + nest++; > > What's up with this nest++ nest--? > When you open a nest, you should have an error path inten to close the > nest and jump there in case of an error. This is hard to read and > understand. it was in order to have the loop for nest_end. I removed it. > > > >> + >> + err = devlink_health_buffer_put_value_u8(buffer, stopped); >> + if (err) >> + goto buffer_error; >> + >> + for (i = 0; i < nest; i++) >> + devlink_health_buffer_nest_end(buffer); >> + >> + return 0; >> + >> +buffer_error: >> + for (i = 0; i < nest; i++) >> + devlink_health_buffer_nest_cancel(buffer); >> + return err; >> +} >> + >> +static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, >> + struct devlink_health_buffer **buffers_array, >> + unsigned int buffer_size, >> + unsigned int num_buffers) >> +{ >> + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); >> + unsigned int buff = 0; >> + int i = 0, err = 0; >> + >> + if (buffer_size < MLX5E_TX_REPORTER_PER_SQ_MAX_LEN) >> + return -ENOMEM; >> + >> + mutex_lock(&priv->state_lock); >> + >> + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { >> + mutex_unlock(&priv->state_lock); >> + return 0; >> + } >> + >> + while (i < priv->channels.num * priv->channels.params.num_tc) { >> + struct mlx5e_txqsq *sq = priv->txq2sq[i]; >> + u8 state; >> + >> + err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); >> + if (err) >> + break; >> + >> + err = mlx5e_tx_reporter_build_diagnose_output(buffers_array[buff], >> + sq->sqn, state, >> + netif_xmit_stopped(sq->txq)); >> + if (err) { >> + if (++buff == num_buffers) >> + break; >> + } else { >> + i++; >> + } >> + } >> + >> + mutex_unlock(&priv->state_lock); >> + return err; >> +} >> + >> +static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { >> + .name = "TX", >> + .recover = mlx5e_tx_reporter_recover, >> + .diagnose_size = MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC * >> + MLX5E_TX_REPORTER_PER_SQ_MAX_LEN, >> + .diagnose = mlx5e_tx_reporter_diagnose, >> + .dump_size = 0, >> + .dump = NULL, > > Don't initialize 0 and NULL in static const. Ack. > > [...] >
Powered by blists - more mailing lists