linux-kernel - RE: [PATCH 2/2] nvme-pci: handle persistent internal error AER from NVMe controller

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <PH0PR21MB302599C693F0A48F3777008ED7DF9@PH0PR21MB3025.namprd21.prod.outlook.com>
Date:   Wed, 1 Jun 2022 15:56:59 +0000
From:   "Michael Kelley (LINUX)" <mikelley@...rosoft.com>
To:     Christoph Hellwig <hch@....de>
CC:     "kbusch@...nel.org" <kbusch@...nel.org>,
        "axboe@...com" <axboe@...com>,
        "sagi@...mberg.me" <sagi@...mberg.me>,
        "linux-nvme@...ts.infradead.org" <linux-nvme@...ts.infradead.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        Caroline Subramoney <Caroline.Subramoney@...rosoft.com>,
        Richard Wurdack <riwurd@...rosoft.com>,
        Nathan Obr <Nathan.Obr@...rosoft.com>
Subject: RE: [PATCH 2/2] nvme-pci: handle persistent internal error AER from
 NVMe controller

From: Christoph Hellwig <hch@....de>
> 
> This really belongs into common code.  See the untested patch below
> of how I'd do it.  The nvme_should_reset would be a separate prep
> patch again.

Indeed, that makes sense.  I had missed that execution gets from
the common code back to the PCI-specific code via the reset_work
function, so unnecessarily did everything in the pci.c.

If there is a persistent error that does a controller reset, it looks
like we should *not* queue async_event_work at the end of
nvme_complete_async_event().  The controller reset will
submit an AER on the admin queue, and so presumably
we don't want nvme_async_event_work() to also try to submit
another AER, which may or may not succeed depending on the
timing of when the controller state shows LIVE again.
Agreed?

Michael

> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 72f7c955c7078..b8b8e9ee04120 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -171,6 +171,24 @@ static inline void nvme_stop_failfast_work(struct nvme_ctrl
> *ctrl)
>  	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
>  }
> 
> +bool nvme_should_reset(struct nvme_ctrl *ctrl, u32 csts)
> +{
> +	/* If there is a reset/reinit ongoing, we shouldn't reset again. */
> +	switch (ctrl->state) {
> +	case NVME_CTRL_RESETTING:
> +	case NVME_CTRL_CONNECTING:
> +		return false;
> +	default:
> +		break;
> +	}
> +
> +	/*
> +	 * We shouldn't reset unless the controller is on fatal error state or
> +	 * if we lost the communication with it.
> +	 */
> +	return (csts & NVME_CSTS_CFS) ||
> +		(ctrl->subsystem && (csts & NVME_CSTS_NSSRO));
> +}
> 
>  int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
>  {
> @@ -4537,24 +4555,41 @@ static void nvme_handle_aen_notice(struct nvme_ctrl
> *ctrl, u32 result)
>  	}
>  }
> 
> +static void nvme_handle_aen_persistent_error(struct nvme_ctrl *ctrl)
> +{
> +	u32 csts;
> +
> +	if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts) < 0 ||
> +	    nvme_should_reset(ctrl, csts)) {
> +		dev_warn(ctrl->device, "resetting due to AEN\n");
> +		nvme_reset_ctrl(ctrl);
> +	}
> +}
> +
>  void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
>  		volatile union nvme_result *res)
>  {
>  	u32 result = le32_to_cpu(res->u32);
> -	u32 aer_type = result & 0x07;
> +	u32 aen_type = result & 0x07;
> +	u32 aen_subtype = (result & 0xff00) >> 8;
> 
>  	if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)
>  		return;
> 
> -	switch (aer_type) {
> +	switch (aen_type) {
>  	case NVME_AER_NOTICE:
>  		nvme_handle_aen_notice(ctrl, result);
>  		break;
>  	case NVME_AER_ERROR:
> +		if (aen_subtype == NVME_AER_ERROR_PERSIST_INT_ERR) {
> +			nvme_handle_aen_persistent_error(ctrl);
> +			break;
> +		}
> +		fallthrough;
>  	case NVME_AER_SMART:
>  	case NVME_AER_CSS:
>  	case NVME_AER_VS:
> -		trace_nvme_async_event(ctrl, aer_type);
> +		trace_nvme_async_event(ctrl, aen_type);
>  		ctrl->aen_result = result;
>  		break;
>  	default:
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 9b72b6ecf33c9..0d7e9ac52d25a 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -762,6 +762,7 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid,
>  		      u32 *result);
>  int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
>  void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
> +bool nvme_should_reset(struct nvme_ctrl *ctrl, u32 csts);
>  int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
>  int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
>  int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 5a98a7de09642..c57023d98f8f3 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -1293,31 +1293,6 @@ static void abort_endio(struct request *req, blk_status_t
> error)
>  	blk_mq_free_request(req);
>  }
> 
> -static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
> -{
> -	/* If true, indicates loss of adapter communication, possibly by a
> -	 * NVMe Subsystem reset.
> -	 */
> -	bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
> -
> -	/* If there is a reset/reinit ongoing, we shouldn't reset again. */
> -	switch (dev->ctrl.state) {
> -	case NVME_CTRL_RESETTING:
> -	case NVME_CTRL_CONNECTING:
> -		return false;
> -	default:
> -		break;
> -	}
> -
> -	/* We shouldn't reset unless the controller is on fatal error state
> -	 * _or_ if we lost the communication with it.
> -	 */
> -	if (!(csts & NVME_CSTS_CFS) && !nssro)
> -		return false;
> -
> -	return true;
> -}
> -
>  static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
>  {
>  	/* Read a config register to help see what died. */
> @@ -1355,7 +1330,7 @@ static enum blk_eh_timer_return nvme_timeout(struct
> request *req, bool reserved)
>  	/*
>  	 * Reset immediately if the controller is failed
>  	 */
> -	if (nvme_should_reset(dev, csts)) {
> +	if (nvme_should_reset(&dev->ctrl, csts)) {
>  		nvme_warn_reset(dev, csts);
>  		nvme_dev_disable(dev, false);
>  		nvme_reset_ctrl(&dev->ctrl);
> diff --git a/include/linux/nvme.h b/include/linux/nvme.h
> index 29ec3e3481ff6..8ced2439f1f34 100644
> --- a/include/linux/nvme.h
> +++ b/include/linux/nvme.h
> @@ -711,6 +711,10 @@ enum {
>  	NVME_AER_VS			= 7,
>  };
> 
> +enum {
> +	NVME_AER_ERROR_PERSIST_INT_ERR	= 0x03,
> +};
> +
>  enum {
>  	NVME_AER_NOTICE_NS_CHANGED	= 0x00,
>  	NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,