[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <cdc54dc5c21cef5eafe563965e881e23a00191e9.camel@mellanox.com>
Date: Mon, 29 Apr 2019 18:32:45 +0000
From: Saeed Mahameed <saeedm@...lanox.com>
To: "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
Aya Levin <ayal@...lanox.com>
CC: Eran Ben Elisha <eranbe@...lanox.com>,
Jiri Pirko <jiri@...lanox.com>
Subject: Re: [PATCH net-next RFC] Dump SW SQ context as part of tx reporter
On Mon, 2019-04-29 at 17:17 +0300, Aya Levin wrote:
> TX reporter reports an error on two scenarios:
> - TX timeout on a specific tx queue
> - TX completion error on a specific send queue
> Prior to this patch, no dump data was supported by the tx reporter.
> This
> patch adds support for SW data dump of the related SQ context. The
> dump
> is simply the SQ's raw memory snapshot taken right after the error
> was
> reported, before any recovery procedure was launched. With this
> approach, no maintenance is needed as the driver fetch the actual
> data
> according to the layout on which the SQ was compiled with. By
> providing
> a SW context, one can easily debug error on a given SQ.
>
> In order to offline translate the raw memory into a human readable
> format, the user can use some out-of-kernel scripts which receives as
> an
> input the following:
> - Object raw memory
> - Driver object compiled with debug info (can be taken/generated at
> any time from the machine)
> - Object name
>
> An example of such script output can be seen below.
> Note: the script is not offered as part of this patch as it do not
> belong to the kernel, I just described it in order to grasp the
> general
> idea of how/what can be fetched from SW dump via devlink health.
>
What was the script ? provided a given raw dump how do you find which
version is it, object name ?
did you use any well known raw debug format, like DWARF ?
> The output of the SW dump can be extracted by devlink health command:
> $ sudo devlink health dump show pci/0000:00:0b.0 reporter tx.
> mlx5e_txqsq: sqn: 6336
> memory:
> 00 00 00 00 00 00 00 00
> 01 00 00 00 00 00 00 00
> 00 00 00 00 00 00 00 00
> 45 f4 88 cb 09 00 00 00
> 00 00 00 00 00 00 00 00
> 00 00 00 00 00 00 00 00
> c0 ff ff ff 1f 00 00 00
> f8 18 1e 89 81 88 ff ff
> ...
>
> script output below, with struct members names and actual values:
>
> struct mlx5e_txqsq {
> short unsigned int cc 0x5 ;
> unsigned int dma_fifo_cc 0x5 ;
> struct net_dim {
> unsigned char state 0x1 ;
> struct net_dim_stats {
> int ppms 0x0 ;
> int bpms 0x0 ;
> int epms 0x0 ;
> } prev_stats;
> struct net_dim_sample {
> long long int time 0x90766ef9d ;
> unsigned int pkt_ctr 0x0 ;
> unsigned int byte_ctr 0x0 ;
> short unsigned int event_ctr 0x0 ;
> } start_sample;
> struct work_struct {
> struct {
> long int counter 0x1fffffffc0 ;
> } data;
> struct list_head {
> struct list_head * next 0xffff8881b0
> 8998f8 ;
> struct list_head * prev 0xffff8881b0
> 8998f8 ;
> } entry;
> void (*func)(struct work_struct *) 0xff
> ffffffa02d0e30 ;
> } work;
> unsigned char profile_ix 0x60 ;
> unsigned char mode 0x72 ;
> unsigned char tune_state 0x35 ;
> unsigned char steps_right 0xa0 ;
> unsigned char steps_left 0xff ;
> unsigned char tired 0xff ;
> } dim;
> short unsigned int pc 0x0 ;
> unsigned int dma_fifo_pc 0x0 ;
> struct mlx5e_cq {
> struct mlx5_cqwq {
> struct mlx5_frag_buf_ctrl {
> struct mlx5_buf_list * frags 0x50
> 0000005 ;
> unsigned int sz_m1 0x0 ;
> short unsigned int frag_sz_m1 0x0
> ;
> short unsigned int strides_offset
> 0x0 ;
> unsigned char log_sz 0x0 ;
> unsigned char log_stride 0x0 ;
> unsigned char log_frag_strides 0x0
> ;
> } fbc;
> __be32 * db 0x0 ;
> unsigned int cc 0x0 ;
> } wq;
> short unsigned int event_ctr 0x0 ;
> struct napi_struct * napi 0x0 ;
> struct mlx5_core_cq {
> unsigned int cqn 0x0 ;
> int cqe_sz 0x0 ;
> __be32 * set_ci_db 0xffff8881b1aa4988 ;
> __be32 * arm_db 0x3f000003ff ;
> struct mlx5_uars_page * uar 0x6060a ;
> struct refcount_struct {
> struct {
> int counter 0xa1814500 ;
> } refs;
> } refcount;
> struct completion {
> unsigned int done 0x5 ;
> struct wait_queue_head {
> struct spinlock {
> union {
> struct raw_spi
> nlock {
> struct
> qspinlock {
>
> union {
>
> struct {
>
> int
> counter 0x5 ;
>
> } val;
>
> struct {
>
> unsigned
> char locked 0x5 ;
>
> unsigned
> char pending 0x0 ;
>
> } ;
>
> struct {
>
> short unsigned
> int locked_pending 0x5 ;
>
> short unsigned
> int tail 0x0 ;
>
> } ;
>
> } ;
> }
> raw_lock;
> } rlock;
> } ;
> } lock;
> struct list_head {
> struct list_head *
> next 0xffff8881b089bb88 ;
> struct list_head *
> prev 0x4000000c0a ;
> } head;
> } wait;
> } free;
> unsigned int vector 0xa1814500 ;
> unsigned int irqn 0xffff8881 ;
> void (*comp)(struct mlx5_core_cq *)
> 0xffff8881a1814504 ;
> void (*event)(struct mlx5_core_cq *, enum
> mlx5_event) 0xffff8881a2cdea08 ;
> unsigned int cons_index 0x1 ;
> unsigned int arm_sn 0x0 ;
> struct mlx5_rsc_debug * dbg 0x0 ;
> int pid 0x0 ;
> struct {
> struct list_head {
> struct list_head * next 0xff
> ffffff ;
> struct list_head * prev 0xff
> ffffffffffffff ;
> } list;
> void (*comp)(struct mlx5_core_cq *)
> 0xffffffffa0356940 ;
> void * priv 0x0 ;
> } tasklet_ctx;
> int reset_notify_added 0x0 ;
> struct list_head {
> struct list_head * next 0xffffffffa0
> 300700 ;
> struct list_head * prev 0xd ;
> } reset_notify;
> struct mlx5_eq_comp * eq 0x0 ;
> short unsigned int uid 0x9a70 ;
> } mcq;
> struct mlx5e_channel * channel 0xffff8881b0899a70 ;
> struct mlx5_core_dev * mdev 0x4800000001 ;
> struct mlx5_wq_ctrl {
> struct mlx5_core_dev * mdev 0xffffffffa0
> 2d5350 ;
> struct mlx5_frag_buf {
> struct mlx5_buf_list * frags 0xff
> ffffffa02d5460 ;
> int npages 0x0 ;
> int size 0x5 ;
> unsigned char page_shift 0x8 ;
> } buf;
> struct mlx5_db {
> __be32 * db 0x1c6 ;
> union {
> struct mlx5_db_pgdir * pgdir
> 0x0 ;
> struct mlx5_ib_user_db_page *
> user_page 0x0 ;
> } u;
> long long unsigned int dma 0xff
> ff8881b0899ab0 ;
> int index 0x0 ;
> } db;
> } wq_ctrl;
> } cq;
> struct mlx5_wq_cyc {
> struct mlx5_frag_buf_ctrl {
> struct mlx5_buf_list * frags 0xffff8881a7
> 600160 ;
> unsigned int sz_m1 0xa7600160 ;
> short unsigned int frag_sz_m1 0x8881 ;
> short unsigned int strides_offset 0xff
> ff ;
> unsigned char log_sz 0x88 ;
> unsigned char log_stride 0x49 ;
> unsigned char log_frag_strides 0xaa ;
> } fbc;
> __be32 * db 0x1000000000010 ;
> short unsigned int sz 0xc ;
> short unsigned int wqe_ctr 0x0 ;
> short unsigned int cur_sz 0x0 ;
> } wq;
> unsigned int dma_fifo_mask 0xa1814500 ;
> struct mlx5e_sq_stats * stats 0xffff8881a33a0348 ;
> struct {
> struct mlx5e_sq_dma * dma_fifo 0x1a1814500 ;
> struct mlx5e_tx_wqe_info * wqe_info 0x14 ;
> } db;
> void * uar_map 0x0 ;
> struct netdev_queue * txq 0x0 ;
> unsigned int sqn 0x18c0 ;
> unsigned char min_inline_mode 0x0 ;
> struct device * pdev 0x0 ;
> unsigned int mkey_be 0x0 ;
> long unsigned int state 0x0 ;
> struct hwtstamp_config * tstamp 0x0 ;
> struct mlx5_clock * clock 0xffff8881b1aa6f88 ;
> struct mlx5_wq_ctrl {
> struct mlx5_core_dev * mdev 0x3f000003ff ;
> struct mlx5_frag_buf {
> struct mlx5_buf_list * frags 0x6060a ;
> int npages 0xa1814604 ;
> int size 0xffff8881 ;
> unsigned char page_shift 0x0 ;
> } buf;
> struct mlx5_db {
> __be32 * db 0xfff ;
> union {
> struct mlx5_db_pgdir * pgdir 0x0
> ;
> struct mlx5_ib_user_db_page *
> user_page 0x0 ;
> } u;
> long long unsigned int dma 0xffff888188
> 440000 ;
> int index 0x8b074000 ;
> } db;
> } wq_ctrl;
> struct mlx5e_channel * channel 0xffffc9000010d800 ;
> int txq_ix 0xa0020180 ;
txq_ix is too hight to make any sense here.
> unsigned int rate_limit 0xffff8881 ;
> struct work_struct {
> struct {
> long int counter 0x1000018c0 ;
> } data;
> struct list_head {
> struct list_head * next 0xffff8881c32b68e8 ;
> struct list_head * prev 0x800 ;
> } entry;
> void (*func)(struct work_struct *) 0x9
> ;
> } recover_work;
> } ;
>
> Signed-off-by: Aya Levin <ayal@...lanox.com>
> ---
> .../ethernet/mellanox/mlx5/core/en/reporter_tx.c | 100
> +++++++++++++++++++++
> 1 file changed, 100 insertions(+)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
> b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
> index 476dd97f7f2f..8a39f5525e57 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
> @@ -9,6 +9,7 @@
>
> struct mlx5e_tx_err_ctx {
> int (*recover)(struct mlx5e_txqsq *sq);
> + int (*dump)(struct mlx5e_txqsq *sq);
> struct mlx5e_txqsq *sq;
> };
>
> @@ -281,10 +282,109 @@ static int mlx5e_tx_reporter_diagnose(struct
> devlink_health_reporter *reporter,
> return err;
> }
>
> +static int mlx5e_tx_reporter_sw_dump_from_ctx(struct mlx5e_priv
> *priv,
> + struct mlx5e_txqsq *sq,
> + struct devlink_fmsg
> *fmsg)
> +{
> + u64 *ptr = (u64 *)sq;
> + int copy, err;
> + int i = 0;
> +
> + if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
> + return 0;
> +
> + err = devlink_fmsg_pair_nest_start(fmsg, "mlx5e_txqsq");
> + if (err)
> + return err;
> +
> + err = devlink_fmsg_obj_nest_start(fmsg);
> + if (err)
> + return err;
> +
> + err = devlink_fmsg_arr_pair_nest_start(fmsg, "memory");
> + if (err)
> + return err;
> +
> + while (i < sizeof(struct mlx5e_txqsq)) {
> + copy = sizeof(u64);
> +
> + if (i + copy > sizeof(struct mlx5e_txqsq))
> + copy = sizeof(struct mlx5e_txqsq) - i;
> +
> + err = devlink_fmsg_binary_put(fmsg, ptr, copy);
> + if (err)
> + return err;
> + ptr++;
> + i += copy;
> + }
> +
> + err = devlink_fmsg_arr_pair_nest_end(fmsg);
> + if (err)
> + return err;
> +
> + err = devlink_fmsg_obj_nest_end(fmsg);
> + if (err)
> + return err;
> +
> + err = devlink_fmsg_pair_nest_end(fmsg);
> +
> + return err;
> +}
> +
> +static int mlx5e_tx_reporter_sw_dump_all(struct mlx5e_priv *priv,
> + struct devlink_fmsg *fmsg)
> +{
> + int i, err = 0;
> +
> + mutex_lock(&priv->state_lock);
> +
> + if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
> + goto unlock;
> +
> + err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
> + if (err)
> + goto unlock;
> +
> + for (i = 0; i < priv->channels.num * priv-
> >channels.params.num_tc;
> + i++) {
> + err = devlink_fmsg_obj_nest_start(fmsg);
> + if (err)
> + goto unlock;
> +
> + err = mlx5e_tx_reporter_sw_dump_from_ctx(priv, priv-
> >txq2sq[i],
> + fmsg);
> + if (err)
> + goto unlock;
> +
> + err = devlink_fmsg_pair_nest_end(fmsg);
> + if (err)
> + goto unlock;
> + }
> + err = devlink_fmsg_arr_pair_nest_end(fmsg);
> + if (err)
> + goto unlock;
> +
> +unlock:
> + mutex_unlock(&priv->state_lock);
> + return err;
> +}
> +
> +static int mlx5e_tx_reporter_sw_dump(struct devlink_health_reporter
> *reporter,
> + struct devlink_fmsg *fmsg, void
> *context)
> +{
> + struct mlx5e_priv *priv =
> devlink_health_reporter_priv(reporter);
> + struct mlx5e_tx_err_ctx *err_ctx = context;
> +
> + return err_ctx ? mlx5e_tx_reporter_sw_dump_from_ctx(priv,
> err_ctx->sq,
> + fmsg) :
> + mlx5e_tx_reporter_sw_dump_all(priv, fmsg);
> +}
> +
> static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops
> = {
> .name = "tx",
> .recover = mlx5e_tx_reporter_recover,
> .diagnose = mlx5e_tx_reporter_diagnose,
> + .dump = mlx5e_tx_reporter_sw_dump,
> };
>
> #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
Powered by blists - more mailing lists