[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8b88e7b4-bf60-1944-8ffd-12dd3bab3fae@mellanox.com>
Date: Tue, 30 Apr 2019 11:13:21 +0000
From: Aya Levin <ayal@...lanox.com>
To: Saeed Mahameed <saeedm@...lanox.com>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>
CC: Eran Ben Elisha <eranbe@...lanox.com>,
Jiri Pirko <jiri@...lanox.com>
Subject: Re: [PATCH net-next RFC] Dump SW SQ context as part of tx reporter
On 4/29/2019 9:32 PM, Saeed Mahameed wrote:
> On Mon, 2019-04-29 at 17:17 +0300, Aya Levin wrote:
>> TX reporter reports an error on two scenarios:
>> - TX timeout on a specific tx queue
>> - TX completion error on a specific send queue
>> Prior to this patch, no dump data was supported by the tx reporter.
>> This
>> patch adds support for SW data dump of the related SQ context. The
>> dump
>> is simply the SQ's raw memory snapshot taken right after the error
>> was
>> reported, before any recovery procedure was launched. With this
>> approach, no maintenance is needed as the driver fetch the actual
>> data
>> according to the layout on which the SQ was compiled with. By
>> providing
>> a SW context, one can easily debug error on a given SQ.
>>
>> In order to offline translate the raw memory into a human readable
>> format, the user can use some out-of-kernel scripts which receives as
>> an
>> input the following:
>> - Object raw memory
>> - Driver object compiled with debug info (can be taken/generated at
>> any time from the machine)
>> - Object name
>>
>> An example of such script output can be seen below.
>> Note: the script is not offered as part of this patch as it do not
>> belong to the kernel, I just described it in order to grasp the
>> general
>> idea of how/what can be fetched from SW dump via devlink health.
>>
>
> What was the script ? provided a given raw dump how do you find which
> version is it, object name ?
The script parses the pahole output of an object with debug info, from
it the script extracts the struct's layout and members offsets. In
addition it merges this with the raw memory given by devlink.
Since the analysis of the dump is done in offline - the customer/vendor
should use the object file of the corresponding version which generated
the report.
>
> did you use any well known raw debug format, like DWARF ?
Yes, I use a standard debug format supported by pahole (in my case
DWARF). In addition to debug info I also set CONFIG_DEBUG_INFO_REDUCED=no
>
>> The output of the SW dump can be extracted by devlink health command:
>> $ sudo devlink health dump show pci/0000:00:0b.0 reporter tx.
>> mlx5e_txqsq: sqn: 6336
>> memory:
>> 00 00 00 00 00 00 00 00
>> 01 00 00 00 00 00 00 00
>> 00 00 00 00 00 00 00 00
>> 45 f4 88 cb 09 00 00 00
>> 00 00 00 00 00 00 00 00
>> 00 00 00 00 00 00 00 00
>> c0 ff ff ff 1f 00 00 00
>> f8 18 1e 89 81 88 ff ff
>> ...
>>
>> script output below, with struct members names and actual values:
>>
>> struct mlx5e_txqsq {
>> short unsigned int cc 0x5 ;
>> unsigned int dma_fifo_cc 0x5 ;
>> struct net_dim {
>> unsigned char state 0x1 ;
>> struct net_dim_stats {
>> int ppms 0x0 ;
>> int bpms 0x0 ;
>> int epms 0x0 ;
>> } prev_stats;
>> struct net_dim_sample {
>> long long int time 0x90766ef9d ;
>> unsigned int pkt_ctr 0x0 ;
>> unsigned int byte_ctr 0x0 ;
>> short unsigned int event_ctr 0x0 ;
>> } start_sample;
>> struct work_struct {
>> struct {
>> long int counter 0x1fffffffc0 ;
>> } data;
>> struct list_head {
>> struct list_head * next 0xffff8881b0
>> 8998f8 ;
>> struct list_head * prev 0xffff8881b0
>> 8998f8 ;
>> } entry;
>> void (*func)(struct work_struct *) 0xff
>> ffffffa02d0e30 ;
>> } work;
>> unsigned char profile_ix 0x60 ;
>> unsigned char mode 0x72 ;
>> unsigned char tune_state 0x35 ;
>> unsigned char steps_right 0xa0 ;
>> unsigned char steps_left 0xff ;
>> unsigned char tired 0xff ;
>> } dim;
>> short unsigned int pc 0x0 ;
>> unsigned int dma_fifo_pc 0x0 ;
>> struct mlx5e_cq {
>> struct mlx5_cqwq {
>> struct mlx5_frag_buf_ctrl {
>> struct mlx5_buf_list * frags 0x50
>> 0000005 ;
>> unsigned int sz_m1 0x0 ;
>> short unsigned int frag_sz_m1 0x0
>> ;
>> short unsigned int strides_offset
>> 0x0 ;
>> unsigned char log_sz 0x0 ;
>> unsigned char log_stride 0x0 ;
>> unsigned char log_frag_strides 0x0
>> ;
>> } fbc;
>> __be32 * db 0x0 ;
>> unsigned int cc 0x0 ;
>> } wq;
>> short unsigned int event_ctr 0x0 ;
>> struct napi_struct * napi 0x0 ;
>> struct mlx5_core_cq {
>> unsigned int cqn 0x0 ;
>> int cqe_sz 0x0 ;
>> __be32 * set_ci_db 0xffff8881b1aa4988 ;
>> __be32 * arm_db 0x3f000003ff ;
>> struct mlx5_uars_page * uar 0x6060a ;
>> struct refcount_struct {
>> struct {
>> int counter 0xa1814500 ;
>> } refs;
>> } refcount;
>> struct completion {
>> unsigned int done 0x5 ;
>> struct wait_queue_head {
>> struct spinlock {
>> union {
>> struct raw_spi
>> nlock {
>> struct
>> qspinlock {
>>
>> union {
>>
>> struct {
>>
>> int
>> counter 0x5 ;
>>
>> } val;
>>
>> struct {
>>
>> unsigned
>> char locked 0x5 ;
>>
>> unsigned
>> char pending 0x0 ;
>>
>> } ;
>>
>> struct {
>>
>> short unsigned
>> int locked_pending 0x5 ;
>>
>> short unsigned
>> int tail 0x0 ;
>>
>> } ;
>>
>> } ;
>> }
>> raw_lock;
>> } rlock;
>> } ;
>> } lock;
>> struct list_head {
>> struct list_head *
>> next 0xffff8881b089bb88 ;
>> struct list_head *
>> prev 0x4000000c0a ;
>> } head;
>> } wait;
>> } free;
>> unsigned int vector 0xa1814500 ;
>> unsigned int irqn 0xffff8881 ;
>> void (*comp)(struct mlx5_core_cq *)
>> 0xffff8881a1814504 ;
>> void (*event)(struct mlx5_core_cq *, enum
>> mlx5_event) 0xffff8881a2cdea08 ;
>> unsigned int cons_index 0x1 ;
>> unsigned int arm_sn 0x0 ;
>> struct mlx5_rsc_debug * dbg 0x0 ;
>> int pid 0x0 ;
>> struct {
>> struct list_head {
>> struct list_head * next 0xff
>> ffffff ;
>> struct list_head * prev 0xff
>> ffffffffffffff ;
>> } list;
>> void (*comp)(struct mlx5_core_cq *)
>> 0xffffffffa0356940 ;
>> void * priv 0x0 ;
>> } tasklet_ctx;
>> int reset_notify_added 0x0 ;
>> struct list_head {
>> struct list_head * next 0xffffffffa0
>> 300700 ;
>> struct list_head * prev 0xd ;
>> } reset_notify;
>> struct mlx5_eq_comp * eq 0x0 ;
>> short unsigned int uid 0x9a70 ;
>> } mcq;
>> struct mlx5e_channel * channel 0xffff8881b0899a70 ;
>> struct mlx5_core_dev * mdev 0x4800000001 ;
>> struct mlx5_wq_ctrl {
>> struct mlx5_core_dev * mdev 0xffffffffa0
>> 2d5350 ;
>> struct mlx5_frag_buf {
>> struct mlx5_buf_list * frags 0xff
>> ffffffa02d5460 ;
>> int npages 0x0 ;
>> int size 0x5 ;
>> unsigned char page_shift 0x8 ;
>> } buf;
>> struct mlx5_db {
>> __be32 * db 0x1c6 ;
>> union {
>> struct mlx5_db_pgdir * pgdir
>> 0x0 ;
>> struct mlx5_ib_user_db_page *
>> user_page 0x0 ;
>> } u;
>> long long unsigned int dma 0xff
>> ff8881b0899ab0 ;
>> int index 0x0 ;
>> } db;
>> } wq_ctrl;
>> } cq;
>> struct mlx5_wq_cyc {
>> struct mlx5_frag_buf_ctrl {
>> struct mlx5_buf_list * frags 0xffff8881a7
>> 600160 ;
>> unsigned int sz_m1 0xa7600160 ;
>> short unsigned int frag_sz_m1 0x8881 ;
>> short unsigned int strides_offset 0xff
>> ff ;
>> unsigned char log_sz 0x88 ;
>> unsigned char log_stride 0x49 ;
>> unsigned char log_frag_strides 0xaa ;
>> } fbc;
>> __be32 * db 0x1000000000010 ;
>> short unsigned int sz 0xc ;
>> short unsigned int wqe_ctr 0x0 ;
>> short unsigned int cur_sz 0x0 ;
>> } wq;
>> unsigned int dma_fifo_mask 0xa1814500 ;
>> struct mlx5e_sq_stats * stats 0xffff8881a33a0348 ;
>> struct {
>> struct mlx5e_sq_dma * dma_fifo 0x1a1814500 ;
>> struct mlx5e_tx_wqe_info * wqe_info 0x14 ;
>> } db;
>> void * uar_map 0x0 ;
>> struct netdev_queue * txq 0x0 ;
>> unsigned int sqn 0x18c0 ;
>> unsigned char min_inline_mode 0x0 ;
>> struct device * pdev 0x0 ;
>> unsigned int mkey_be 0x0 ;
>> long unsigned int state 0x0 ;
>> struct hwtstamp_config * tstamp 0x0 ;
>> struct mlx5_clock * clock 0xffff8881b1aa6f88 ;
>> struct mlx5_wq_ctrl {
>> struct mlx5_core_dev * mdev 0x3f000003ff ;
>> struct mlx5_frag_buf {
>> struct mlx5_buf_list * frags 0x6060a ;
>> int npages 0xa1814604 ;
>> int size 0xffff8881 ;
>> unsigned char page_shift 0x0 ;
>> } buf;
>> struct mlx5_db {
>> __be32 * db 0xfff ;
>> union {
>> struct mlx5_db_pgdir * pgdir 0x0
>> ;
>> struct mlx5_ib_user_db_page *
>> user_page 0x0 ;
>> } u;
>> long long unsigned int dma 0xffff888188
>> 440000 ;
>> int index 0x8b074000 ;
>> } db;
>> } wq_ctrl;
>> struct mlx5e_channel * channel 0xffffc9000010d800 ;
>> int txq_ix 0xa0020180 ;
>
> txq_ix is too hight to make any sense here.
Thank you for noticing this - I found the bug in the script and fixed it
>
>
>> unsigned int rate_limit 0xffff8881 ;
>> struct work_struct {
>> struct {
>> long int counter 0x1000018c0 ;
>> } data;
>> struct list_head {
>> struct list_head * next 0xffff8881c32b68e8 ;
>> struct list_head * prev 0x800 ;
>> } entry;
>> void (*func)(struct work_struct *) 0x9
>> ;
>> } recover_work;
>> } ;
>>
>> Signed-off-by: Aya Levin <ayal@...lanox.com>
>> ---
>> .../ethernet/mellanox/mlx5/core/en/reporter_tx.c | 100
>> +++++++++++++++++++++
>> 1 file changed, 100 insertions(+)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
>> b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
>> index 476dd97f7f2f..8a39f5525e57 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
>> @@ -9,6 +9,7 @@
>>
>> struct mlx5e_tx_err_ctx {
>> int (*recover)(struct mlx5e_txqsq *sq);
>> + int (*dump)(struct mlx5e_txqsq *sq);
>> struct mlx5e_txqsq *sq;
>> };
>>
>> @@ -281,10 +282,109 @@ static int mlx5e_tx_reporter_diagnose(struct
>> devlink_health_reporter *reporter,
>> return err;
>> }
>>
>> +static int mlx5e_tx_reporter_sw_dump_from_ctx(struct mlx5e_priv
>> *priv,
>> + struct mlx5e_txqsq *sq,
>> + struct devlink_fmsg
>> *fmsg)
>> +{
>> + u64 *ptr = (u64 *)sq;
>> + int copy, err;
>> + int i = 0;
>> +
>> + if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
>> + return 0;
>> +
>> + err = devlink_fmsg_pair_nest_start(fmsg, "mlx5e_txqsq");
>> + if (err)
>> + return err;
>> +
>> + err = devlink_fmsg_obj_nest_start(fmsg);
>> + if (err)
>> + return err;
>> +
>> + err = devlink_fmsg_arr_pair_nest_start(fmsg, "memory");
>> + if (err)
>> + return err;
>> +
>> + while (i < sizeof(struct mlx5e_txqsq)) {
>> + copy = sizeof(u64);
>> +
>> + if (i + copy > sizeof(struct mlx5e_txqsq))
>> + copy = sizeof(struct mlx5e_txqsq) - i;
>> +
>> + err = devlink_fmsg_binary_put(fmsg, ptr, copy);
>> + if (err)
>> + return err;
>> + ptr++;
>> + i += copy;
>> + }
>> +
>> + err = devlink_fmsg_arr_pair_nest_end(fmsg);
>> + if (err)
>> + return err;
>> +
>> + err = devlink_fmsg_obj_nest_end(fmsg);
>> + if (err)
>> + return err;
>> +
>> + err = devlink_fmsg_pair_nest_end(fmsg);
>> +
>> + return err;
>> +}
>> +
>> +static int mlx5e_tx_reporter_sw_dump_all(struct mlx5e_priv *priv,
>> + struct devlink_fmsg *fmsg)
>> +{
>> + int i, err = 0;
>> +
>> + mutex_lock(&priv->state_lock);
>> +
>> + if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
>> + goto unlock;
>> +
>> + err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
>> + if (err)
>> + goto unlock;
>> +
>> + for (i = 0; i < priv->channels.num * priv-
>>> channels.params.num_tc;
>> + i++) {
>> + err = devlink_fmsg_obj_nest_start(fmsg);
>> + if (err)
>> + goto unlock;
>> +
>> + err = mlx5e_tx_reporter_sw_dump_from_ctx(priv, priv-
>>> txq2sq[i],
>> + fmsg);
>> + if (err)
>> + goto unlock;
>> +
>> + err = devlink_fmsg_pair_nest_end(fmsg);
>> + if (err)
>> + goto unlock;
>> + }
>> + err = devlink_fmsg_arr_pair_nest_end(fmsg);
>> + if (err)
>> + goto unlock;
>> +
>> +unlock:
>> + mutex_unlock(&priv->state_lock);
>> + return err;
>> +}
>> +
>> +static int mlx5e_tx_reporter_sw_dump(struct devlink_health_reporter
>> *reporter,
>> + struct devlink_fmsg *fmsg, void
>> *context)
>> +{
>> + struct mlx5e_priv *priv =
>> devlink_health_reporter_priv(reporter);
>> + struct mlx5e_tx_err_ctx *err_ctx = context;
>> +
>> + return err_ctx ? mlx5e_tx_reporter_sw_dump_from_ctx(priv,
>> err_ctx->sq,
>> + fmsg) :
>> + mlx5e_tx_reporter_sw_dump_all(priv, fmsg);
>> +}
>> +
>> static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops
>> = {
>> .name = "tx",
>> .recover = mlx5e_tx_reporter_recover,
>> .diagnose = mlx5e_tx_reporter_diagnose,
>> + .dump = mlx5e_tx_reporter_sw_dump,
>> };
>>
>> #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
Powered by blists - more mailing lists