lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 29 Apr 2019 18:32:45 +0000
From:   Saeed Mahameed <saeedm@...lanox.com>
To:     "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
        Aya Levin <ayal@...lanox.com>
CC:     Eran Ben Elisha <eranbe@...lanox.com>,
        Jiri Pirko <jiri@...lanox.com>
Subject: Re: [PATCH net-next RFC] Dump SW SQ context as part of tx reporter

On Mon, 2019-04-29 at 17:17 +0300, Aya Levin wrote:
> TX reporter reports an error on two scenarios:
> - TX timeout on a specific tx queue
> - TX completion error on a specific send queue
> Prior to this patch, no dump data was supported by the tx reporter.
> This
> patch adds support for SW data dump of the related SQ context. The
> dump
> is simply the SQ's raw memory snapshot taken right after the error
> was
> reported, before any recovery procedure was launched. With this
> approach, no maintenance is needed as the driver fetch the actual
> data
> according to the layout on which the SQ was compiled with.  By
> providing
> a SW context, one can easily debug error on a given SQ.
> 
> In order to offline translate the raw memory into a human readable
> format, the user can use some out-of-kernel scripts which receives as
> an
> input the following:
> - Object raw memory
> - Driver object compiled with debug info (can be taken/generated at
> any time from the machine)
> - Object name
> 
> An example of such script output can be seen below.
> Note: the script is not offered as part of this patch as it do not
> belong to the kernel, I just described it in order to grasp the
> general
> idea of how/what can be fetched from SW dump via devlink health.
> 

What was the script ? provided a given raw dump how do you find which
version is it, object name ?

did you use any well known raw debug format, like DWARF ?

> The output of the SW dump can be extracted by devlink health command:
> $ sudo devlink health dump show pci/0000:00:0b.0 reporter tx.
>  mlx5e_txqsq: sqn: 6336
>  memory:
>    00 00 00 00 00 00 00 00
>    01 00 00 00 00 00 00 00
>    00 00 00 00 00 00 00 00
>    45 f4 88 cb 09 00 00 00
>    00 00 00 00 00 00 00 00
>    00 00 00 00 00 00 00 00
>    c0 ff ff ff 1f 00 00 00
>    f8 18 1e 89 81 88 ff ff
>    ...
> 
> script output below, with struct members names and actual values:
> 
> struct  mlx5e_txqsq {
> 	short unsigned int         cc 	 0x5 ;
> 	unsigned int               dma_fifo_cc 	 0x5 ;
> 	struct  net_dim {
> 		unsigned char      state 	 0x1 ;
> 		struct  net_dim_stats {
> 			int        ppms 	 0x0 ;
> 			int        bpms 	 0x0 ;
> 			int        epms 	 0x0 ;
> 		} prev_stats;
> 		struct  net_dim_sample {
> 			long long int time 	 0x90766ef9d ;
> 			unsigned int pkt_ctr 	 0x0 ;
> 			unsigned int byte_ctr 	 0x0 ;
> 			short unsigned int event_ctr 	 0x0 ;
> 		} start_sample;
> 		struct  work_struct {
> 			struct   {
> 				long int counter 	 0x1fffffffc0 ;
> 			} data;
> 			struct  list_head {
> 				struct list_head * next 	 0xffff8881b0
> 8998f8 ;
> 				struct list_head * prev 	 0xffff8881b0
> 8998f8 ;
> 			} entry;
> 			void       (*func)(struct work_struct *) 	 0xff
> ffffffa02d0e30 ;
> 		} work;
> 		unsigned char      profile_ix 	 0x60 ;
> 		unsigned char      mode 	 0x72 ;
> 		unsigned char      tune_state 	 0x35 ;
> 		unsigned char      steps_right 	 0xa0 ;
> 		unsigned char      steps_left 	 0xff ;
> 		unsigned char      tired 	 0xff ;
> 	} dim;
> 	short unsigned int         pc 	 0x0 ;
> 	unsigned int               dma_fifo_pc 	 0x0 ;
> 	struct  mlx5e_cq {
> 		struct  mlx5_cqwq {
> 			struct  mlx5_frag_buf_ctrl {
> 				struct mlx5_buf_list * frags 	 0x50
> 0000005 ;
> 				unsigned int sz_m1 	 0x0 ;
> 				short unsigned int frag_sz_m1 	 0x0
> ;
> 				short unsigned int strides_offset 	
>  0x0 ;
> 				unsigned char log_sz 	 0x0 ;
> 				unsigned char log_stride 	 0x0 ;
> 				unsigned char log_frag_strides 	 0x0
> ;
> 			} fbc;
> 			__be32 *   db 	 0x0 ;
> 			unsigned int cc 	 0x0 ;
> 		} wq;
> 		short unsigned int event_ctr 	 0x0 ;
> 		struct napi_struct * napi 	 0x0 ;
> 		struct  mlx5_core_cq {
> 			unsigned int cqn 	 0x0 ;
> 			int        cqe_sz 	 0x0 ;
> 			__be32 *   set_ci_db 	 0xffff8881b1aa4988 ;
> 			__be32 *   arm_db 	 0x3f000003ff ;
> 			struct mlx5_uars_page * uar 	 0x6060a ;
> 			struct  refcount_struct {
> 				struct   {
> 					int    counter 	 0xa1814500 ;
> 				} refs;
> 			} refcount;
> 			struct  completion {
> 				unsigned int done 	 0x5 ;
> 				struct  wait_queue_head {
> 					struct  spinlock {
> 						union   {
> 							struct  raw_spi
> nlock {
> 								struct 
>  qspinlock {
> 									
> union   {
> 									
> 	struct   {
> 									
> 		int                                                    
> counter 	 0x5 ;
> 									
> 	} val;
> 									
> 	struct   {
> 									
> 		unsigned
> char                                          locked 	 0x5 ;
> 									
> 		unsigned
> char                                          pending 	 0x0 ;
> 									
> 	} ;
> 									
> 	struct   {
> 									
> 		short unsigned
> int                                     locked_pending 	 0x5 ;
> 									
> 		short unsigned
> int                                     tail 	 0x0 ;
> 									
> 	} ;
> 									
> } ;
> 								}
> raw_lock;
> 							} rlock;
> 						} ;
> 					} lock;
> 					struct  list_head {
> 						struct list_head *
> next 	 0xffff8881b089bb88 ;
> 						struct list_head *
> prev 	 0x4000000c0a ;
> 					} head;
> 				} wait;
> 			} free;
> 			unsigned int vector 	 0xa1814500 ;
> 			unsigned int irqn 	 0xffff8881 ;
> 			void       (*comp)(struct mlx5_core_cq *) 	
>  0xffff8881a1814504 ;
> 			void       (*event)(struct mlx5_core_cq *, enum
> mlx5_event) 	 0xffff8881a2cdea08 ;
> 			unsigned int cons_index 	 0x1 ;
> 			unsigned int arm_sn 	 0x0 ;
> 			struct mlx5_rsc_debug * dbg 	 0x0 ;
> 			int        pid 	 0x0 ;
> 			struct   {
> 				struct  list_head {
> 					struct list_head * next 	 0xff
> ffffff ;
> 					struct list_head * prev 	 0xff
> ffffffffffffff ;
> 				} list;
> 				void (*comp)(struct mlx5_core_cq *) 	
>  0xffffffffa0356940 ;
> 				void * priv 	 0x0 ;
> 			} tasklet_ctx;
> 			int        reset_notify_added 	 0x0 ;
> 			struct  list_head {
> 				struct list_head * next 	 0xffffffffa0
> 300700 ;
> 				struct list_head * prev 	 0xd ;
> 			} reset_notify;
> 			struct mlx5_eq_comp * eq 	 0x0 ;
> 			short unsigned int uid 	 0x9a70 ;
> 		} mcq;
> 		struct mlx5e_channel * channel 	 0xffff8881b0899a70 ;
> 		struct mlx5_core_dev * mdev 	 0x4800000001 ;
> 		struct  mlx5_wq_ctrl {
> 			struct mlx5_core_dev * mdev 	 0xffffffffa0
> 2d5350 ;
> 			struct  mlx5_frag_buf {
> 				struct mlx5_buf_list * frags 	 0xff
> ffffffa02d5460 ;
> 				int npages 	 0x0 ;
> 				int size 	 0x5 ;
> 				unsigned char page_shift 	 0x8 ;
> 			} buf;
> 			struct  mlx5_db {
> 				__be32 * db 	 0x1c6 ;
> 				union   {
> 					struct mlx5_db_pgdir * pgdir 	
>  0x0 ;
> 					struct mlx5_ib_user_db_page *
> user_page 	 0x0 ;
> 				} u;
> 				long long unsigned int dma 	 0xff
> ff8881b0899ab0 ;
> 				int index 	 0x0 ;
> 			} db;
> 		} wq_ctrl;
> 	} cq;
> 	struct  mlx5_wq_cyc {
> 		struct  mlx5_frag_buf_ctrl {
> 			struct mlx5_buf_list * frags 	 0xffff8881a7
> 600160 ;
> 			unsigned int sz_m1 	 0xa7600160 ;
> 			short unsigned int frag_sz_m1 	 0x8881 ;
> 			short unsigned int strides_offset 	 0xff
> ff ;
> 			unsigned char log_sz 	 0x88 ;
> 			unsigned char log_stride 	 0x49 ;
> 			unsigned char log_frag_strides 	 0xaa ;
> 		} fbc;
> 		__be32 *           db 	 0x1000000000010 ;
> 		short unsigned int sz 	 0xc ;
> 		short unsigned int wqe_ctr 	 0x0 ;
> 		short unsigned int cur_sz 	 0x0 ;
> 	} wq;
> 	unsigned int               dma_fifo_mask 	 0xa1814500 ;
> 	struct mlx5e_sq_stats *    stats 	 0xffff8881a33a0348 ;
> 	struct   {
> 		struct mlx5e_sq_dma * dma_fifo 	 0x1a1814500 ;
> 		struct mlx5e_tx_wqe_info * wqe_info 	 0x14 ;
> 	} db;
> 	void *                     uar_map 	 0x0 ;
> 	struct netdev_queue *      txq 	 0x0 ;
> 	unsigned int               sqn 	 0x18c0 ;
> 	unsigned char              min_inline_mode 	 0x0 ;
> 	struct device *            pdev 	 0x0 ;
> 	unsigned int               mkey_be 	 0x0 ;
> 	long unsigned int          state 	 0x0 ;
> 	struct hwtstamp_config *   tstamp 	 0x0 ;
> 	struct mlx5_clock *        clock 	 0xffff8881b1aa6f88 ;
> 	struct  mlx5_wq_ctrl {
> 		struct mlx5_core_dev * mdev 	 0x3f000003ff ;
> 		struct  mlx5_frag_buf {
> 			struct mlx5_buf_list * frags 	 0x6060a ;
> 			int        npages 	 0xa1814604 ;
> 			int        size 	 0xffff8881 ;
> 			unsigned char page_shift 	 0x0 ;
> 		} buf;
> 		struct  mlx5_db {
> 			__be32 *   db 	 0xfff ;
> 			union   {
> 				struct mlx5_db_pgdir * pgdir 	 0x0
> ;
> 				struct mlx5_ib_user_db_page *
> user_page 	 0x0 ;
> 			} u;
> 			long long unsigned int dma 	 0xffff888188
> 440000 ;
> 			int        index 	 0x8b074000 ;
> 		} db;
> 	} wq_ctrl;
> 	struct mlx5e_channel *     channel 	 0xffffc9000010d800 ;
> 	int                        txq_ix 	 0xa0020180 ;

txq_ix is too hight to make any sense here.


> 	unsigned int               rate_limit 	 0xffff8881 ;
> 	struct  work_struct {
> 		struct   {
> 			long int   counter 	 0x1000018c0 ;
> 		} data;
> 		struct  list_head {
> 			struct list_head * next 	 0xffff8881c32b68e8 ;
> 			struct list_head * prev 	 0x800 ;
> 		} entry;
> 		void               (*func)(struct work_struct *) 	 0x9
> ;
> 	} recover_work;
> } ;
> 
> Signed-off-by: Aya Levin <ayal@...lanox.com>
> ---
>  .../ethernet/mellanox/mlx5/core/en/reporter_tx.c   | 100
> +++++++++++++++++++++
>  1 file changed, 100 insertions(+)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
> b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
> index 476dd97f7f2f..8a39f5525e57 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
> @@ -9,6 +9,7 @@
>  
>  struct mlx5e_tx_err_ctx {
>  	int (*recover)(struct mlx5e_txqsq *sq);
> +	int (*dump)(struct mlx5e_txqsq *sq);
>  	struct mlx5e_txqsq *sq;
>  };
>  
> @@ -281,10 +282,109 @@ static int mlx5e_tx_reporter_diagnose(struct
> devlink_health_reporter *reporter,
>  	return err;
>  }
>  
> +static int mlx5e_tx_reporter_sw_dump_from_ctx(struct mlx5e_priv
> *priv,
> +					      struct mlx5e_txqsq *sq,
> +					      struct devlink_fmsg
> *fmsg)
> +{
> +	u64 *ptr = (u64 *)sq;
> +	int copy, err;
> +	int i = 0;
> +
> +	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
> +		return 0;
> +
> +	err = devlink_fmsg_pair_nest_start(fmsg, "mlx5e_txqsq");
> +	if (err)
> +		return err;
> +
> +	err = devlink_fmsg_obj_nest_start(fmsg);
> +	if (err)
> +		return err;
> +
> +	err = devlink_fmsg_arr_pair_nest_start(fmsg, "memory");
> +	if (err)
> +		return err;
> +
> +	while (i < sizeof(struct mlx5e_txqsq)) {
> +		copy = sizeof(u64);
> +
> +		if (i + copy > sizeof(struct mlx5e_txqsq))
> +			copy = sizeof(struct mlx5e_txqsq) - i;
> +
> +		err = devlink_fmsg_binary_put(fmsg, ptr, copy);
> +		if (err)
> +			return err;
> +		ptr++;
> +		i += copy;
> +	}
> +
> +	err = devlink_fmsg_arr_pair_nest_end(fmsg);
> +	if (err)
> +		return err;
> +
> +	err = devlink_fmsg_obj_nest_end(fmsg);
> +	if (err)
> +		return err;
> +
> +	err = devlink_fmsg_pair_nest_end(fmsg);
> +
> +	return err;
> +}
> +
> +static int mlx5e_tx_reporter_sw_dump_all(struct mlx5e_priv *priv,
> +					 struct devlink_fmsg *fmsg)
> +{
> +	int i, err = 0;
> +
> +	mutex_lock(&priv->state_lock);
> +
> +	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
> +		goto unlock;
> +
> +	err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
> +	if (err)
> +		goto unlock;
> +
> +	for (i = 0; i < priv->channels.num * priv-
> >channels.params.num_tc;
> +	     i++) {
> +		err = devlink_fmsg_obj_nest_start(fmsg);
> +		if (err)
> +			goto unlock;
> +
> +		err = mlx5e_tx_reporter_sw_dump_from_ctx(priv, priv-
> >txq2sq[i],
> +							 fmsg);
> +		if (err)
> +			goto unlock;
> +
> +		err = devlink_fmsg_pair_nest_end(fmsg);
> +		if (err)
> +			goto unlock;
> +	}
> +	err = devlink_fmsg_arr_pair_nest_end(fmsg);
> +	if (err)
> +		goto unlock;
> +
> +unlock:
> +	mutex_unlock(&priv->state_lock);
> +	return err;
> +}
> +
> +static int mlx5e_tx_reporter_sw_dump(struct devlink_health_reporter
> *reporter,
> +				     struct devlink_fmsg *fmsg, void
> *context)
> +{
> +	struct mlx5e_priv *priv =
> devlink_health_reporter_priv(reporter);
> +	struct mlx5e_tx_err_ctx *err_ctx = context;
> +
> +	return err_ctx ? mlx5e_tx_reporter_sw_dump_from_ctx(priv,
> err_ctx->sq,
> +							    fmsg) :
> +			 mlx5e_tx_reporter_sw_dump_all(priv, fmsg);
> +}
> +
>  static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops
> = {
>  		.name = "tx",
>  		.recover = mlx5e_tx_reporter_recover,
>  		.diagnose = mlx5e_tx_reporter_diagnose,
> +		.dump = mlx5e_tx_reporter_sw_dump,
>  };
>  
>  #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500

Powered by blists - more mailing lists