lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220307142452.70c95fd1@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com>
Date:   Mon, 7 Mar 2022 14:24:52 -0800
From:   Jakub Kicinski <kuba@...nel.org>
To:     Michael Chan <michael.chan@...adcom.com>,
        Jiri Pirko <jiri@...nulli.us>
Cc:     davem@...emloft.net, netdev@...r.kernel.org, gospo@...adcom.com,
        eranbe@...dia.com
Subject: Re: [PATCH net-next 9/9] bnxt_en: add an nvm test for hw diagnose

On Sat,  5 Mar 2022 03:54:42 -0500 Michael Chan wrote:
> From: Vikas Gupta <vikas.gupta@...adcom.com>
> 
> Add an NVM test function for devlink hw reporter.
> In this function an NVM VPD area is read followed by
> a write. Test result is cached and if it is successful then
> the next test can be conducted only after HW_RETEST_MIN_TIME to
> avoid frequent writes to the NVM.

You seem to execute a self-test from the .diganose callback.
That really seems like an abuse of the API. It's not hard to
add a separate self-test callback.

Jiri, WDYT?

> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
> index fa0df43ddc1a..9dd878def3c2 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
> @@ -1544,17 +1544,29 @@ struct bnxt_ctx_mem_info {
>  };
>  
>  enum bnxt_hw_err {
> -	BNXT_HW_STATUS_HEALTHY		= 0x0,
> -	BNXT_HW_STATUS_NVM_WRITE_ERR	= 0x1,
> -	BNXT_HW_STATUS_NVM_ERASE_ERR	= 0x2,
> -	BNXT_HW_STATUS_NVM_UNKNOWN_ERR	= 0x3,
> +	BNXT_HW_STATUS_HEALTHY			= 0x0,
> +	BNXT_HW_STATUS_NVM_WRITE_ERR		= 0x1,
> +	BNXT_HW_STATUS_NVM_ERASE_ERR		= 0x2,
> +	BNXT_HW_STATUS_NVM_UNKNOWN_ERR		= 0x3,
> +	BNXT_HW_STATUS_NVM_TEST_VPD_ENT_ERR	= 0x4,
> +	BNXT_HW_STATUS_NVM_TEST_VPD_READ_ERR	= 0x5,
> +	BNXT_HW_STATUS_NVM_TEST_VPD_WRITE_ERR	= 0x6,
> +	BNXT_HW_STATUS_NVM_TEST_INCMPL_ERR	= 0x7,
>  };
>  
>  struct bnxt_hw_health {
>  	u32 nvm_err_address;
>  	u32 nvm_write_errors;
>  	u32 nvm_erase_errors;
> +	u32 nvm_test_vpd_ent_errors;
> +	u32 nvm_test_vpd_read_errors;
> +	u32 nvm_test_vpd_write_errors;
> +	u32 nvm_test_incmpl_errors;
>  	u8 synd;
> +	/* max a test in a day if previous test was successful */
> +#define HW_RETEST_MIN_TIME	(1000 * 3600 * 24)
> +	u8 nvm_test_result;
> +	unsigned long nvm_test_timestamp;
>  	struct devlink_health_reporter *hw_reporter;
>  };
>  
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
> index a802bbda1c27..77e55105d645 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
> @@ -20,6 +20,7 @@
>  #include "bnxt_ulp.h"
>  #include "bnxt_ptp.h"
>  #include "bnxt_coredump.h"
> +#include "bnxt_nvm_defs.h"	/* NVRAM content constant and structure defs */
>  
>  static void __bnxt_fw_recover(struct bnxt *bp)
>  {
> @@ -263,20 +264,82 @@ static const char *hw_err_str(u8 synd)
>  		return "nvm erase error";
>  	case BNXT_HW_STATUS_NVM_UNKNOWN_ERR:
>  		return "unrecognized nvm error";
> +	case BNXT_HW_STATUS_NVM_TEST_VPD_ENT_ERR:
> +		return "nvm test vpd entry error";
> +	case BNXT_HW_STATUS_NVM_TEST_VPD_READ_ERR:
> +		return "nvm test vpd read error";
> +	case BNXT_HW_STATUS_NVM_TEST_VPD_WRITE_ERR:
> +		return "nvm test vpd write error";
> +	case BNXT_HW_STATUS_NVM_TEST_INCMPL_ERR:
> +		return "nvm test incomplete error";
>  	default:
>  		return "unknown hw error";
>  	}
>  }
>  
> +static void bnxt_nvm_test(struct bnxt *bp)
> +{
> +	struct bnxt_hw_health *h = &bp->hw_health;
> +	u32 datalen;
> +	u16 index;
> +	u8 *buf;
> +
> +	if (!h->nvm_test_result) {
> +		if (!h->nvm_test_timestamp ||
> +		    time_after(jiffies, h->nvm_test_timestamp +
> +					msecs_to_jiffies(HW_RETEST_MIN_TIME)))
> +			h->nvm_test_timestamp = jiffies;
> +		else
> +			return;
> +	}
> +
> +	if (bnxt_find_nvram_item(bp->dev, BNX_DIR_TYPE_VPD,
> +				 BNX_DIR_ORDINAL_FIRST, BNX_DIR_EXT_NONE,
> +				 &index, NULL, &datalen) || !datalen) {
> +		h->nvm_test_result = BNXT_HW_STATUS_NVM_TEST_VPD_ENT_ERR;
> +		h->nvm_test_vpd_ent_errors++;
> +		return;
> +	}
> +
> +	buf = kzalloc(datalen, GFP_KERNEL);
> +	if (!buf) {
> +		h->nvm_test_result = BNXT_HW_STATUS_NVM_TEST_INCMPL_ERR;
> +		h->nvm_test_incmpl_errors++;
> +		return;
> +	}
> +
> +	if (bnxt_get_nvram_item(bp->dev, index, 0, datalen, buf)) {
> +		h->nvm_test_result = BNXT_HW_STATUS_NVM_TEST_VPD_READ_ERR;
> +		h->nvm_test_vpd_read_errors++;
> +		goto err;
> +	}
> +
> +	if (bnxt_flash_nvram(bp->dev, BNX_DIR_TYPE_VPD, BNX_DIR_ORDINAL_FIRST,
> +			     BNX_DIR_EXT_NONE, 0, 0, buf, datalen)) {
> +		h->nvm_test_result = BNXT_HW_STATUS_NVM_TEST_VPD_WRITE_ERR;
> +		h->nvm_test_vpd_write_errors++;
> +	}
> +
> +err:
> +	kfree(buf);
> +}
> +
>  static int bnxt_hw_diagnose(struct devlink_health_reporter *reporter,
>  			    struct devlink_fmsg *fmsg,
>  			    struct netlink_ext_ack *extack)
>  {
>  	struct bnxt *bp = devlink_health_reporter_priv(reporter);
>  	struct bnxt_hw_health *h = &bp->hw_health;
> +	u8 synd = h->synd;
>  	int rc;
>  
> -	rc = devlink_fmsg_string_pair_put(fmsg, "Status", hw_err_str(h->synd));
> +	bnxt_nvm_test(bp);
> +	if (h->nvm_test_result) {
> +		synd = h->nvm_test_result;
> +		devlink_health_report(h->hw_reporter, hw_err_str(synd), NULL);
> +	}
> +
> +	rc = devlink_fmsg_string_pair_put(fmsg, "Status", hw_err_str(synd));
>  	if (rc)
>  		return rc;
>  	rc = devlink_fmsg_u32_pair_put(fmsg, "nvm_write_errors", h->nvm_write_errors);
> @@ -285,6 +348,23 @@ static int bnxt_hw_diagnose(struct devlink_health_reporter *reporter,
>  	rc = devlink_fmsg_u32_pair_put(fmsg, "nvm_erase_errors", h->nvm_erase_errors);
>  	if (rc)
>  		return rc;
> +	rc = devlink_fmsg_u32_pair_put(fmsg, "nvm_test_vpd_ent_errors",
> +				       h->nvm_test_vpd_ent_errors);
> +	if (rc)
> +		return rc;
> +	rc = devlink_fmsg_u32_pair_put(fmsg, "nvm_test_vpd_read_errors",
> +				       h->nvm_test_vpd_read_errors);
> +	if (rc)
> +		return rc;
> +	rc = devlink_fmsg_u32_pair_put(fmsg, "nvm_test_vpd_write_errors",
> +				       h->nvm_test_vpd_write_errors);
> +	if (rc)
> +		return rc;
> +	rc = devlink_fmsg_u32_pair_put(fmsg, "nvm_test_incomplete_errors",
> +				       h->nvm_test_incmpl_errors);
> +	if (rc)
> +		return rc;
> +
>  	return 0;
>  }
>  
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
> index eadaca42ed96..178074795b27 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
> @@ -2168,14 +2168,10 @@ static void bnxt_print_admin_err(struct bnxt *bp)
>  	netdev_info(bp->dev, "PF does not have admin privileges to flash or reset the device\n");
>  }
>  
> -static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal,
> -				u16 ext, u16 *index, u32 *item_length,
> -				u32 *data_length);
> -
> -static int bnxt_flash_nvram(struct net_device *dev, u16 dir_type,
> -			    u16 dir_ordinal, u16 dir_ext, u16 dir_attr,
> -			    u32 dir_item_len, const u8 *data,
> -			    size_t data_len)
> +int bnxt_flash_nvram(struct net_device *dev, u16 dir_type,
> +		     u16 dir_ordinal, u16 dir_ext, u16 dir_attr,
> +		     u32 dir_item_len, const u8 *data,
> +		     size_t data_len)
>  {
>  	struct bnxt *bp = netdev_priv(dev);
>  	struct hwrm_nvm_write_input *req;
> @@ -2819,8 +2815,8 @@ static int bnxt_get_nvram_directory(struct net_device *dev, u32 len, u8 *data)
>  	return rc;
>  }
>  
> -static int bnxt_get_nvram_item(struct net_device *dev, u32 index, u32 offset,
> -			       u32 length, u8 *data)
> +int bnxt_get_nvram_item(struct net_device *dev, u32 index, u32 offset,
> +			u32 length, u8 *data)
>  {
>  	struct bnxt *bp = netdev_priv(dev);
>  	int rc;
> @@ -2854,9 +2850,9 @@ static int bnxt_get_nvram_item(struct net_device *dev, u32 index, u32 offset,
>  	return rc;
>  }
>  
> -static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal,
> -				u16 ext, u16 *index, u32 *item_length,
> -				u32 *data_length)
> +int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal,
> +			 u16 ext, u16 *index, u32 *item_length,
> +			 u32 *data_length)
>  {
>  	struct hwrm_nvm_find_dir_entry_output *output;
>  	struct hwrm_nvm_find_dir_entry_input *req;
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
> index 6aa44840f13a..2593e0049582 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
> @@ -56,6 +56,13 @@ int bnxt_hwrm_firmware_reset(struct net_device *dev, u8 proc_type,
>  int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware *fw,
>  				   u32 install_type);
>  int bnxt_get_pkginfo(struct net_device *dev, char *ver, int size);
> +int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal, u16 ext,
> +			 u16 *index, u32 *item_length, u32 *data_length);
> +int bnxt_get_nvram_item(struct net_device *dev, u32 index, u32 offset,
> +			u32 length, u8 *data);
> +int bnxt_flash_nvram(struct net_device *dev, u16 dir_type, u16 dir_ordinal,
> +		     u16 dir_ext, u16 dir_attr, u32 dir_item_len,
> +		     const u8 *data, size_t data_len);
>  void bnxt_ethtool_init(struct bnxt *bp);
>  void bnxt_ethtool_free(struct bnxt *bp);
>  

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ