lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 26 Feb 2018 11:02:50 +0530
From:   poza@...eaurora.org
To:     Bjorn Helgaas <helgaas@...nel.org>
Cc:     Bjorn Helgaas <bhelgaas@...gle.com>,
        Philippe Ombredanne <pombredanne@...b.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        Kate Stewart <kstewart@...uxfoundation.org>,
        linux-pci@...r.kernel.org, linux-kernel@...r.kernel.org,
        Dongdong Liu <liudongdong3@...wei.com>,
        Keith Busch <keith.busch@...el.com>, Wei Zhang <wzhang@...com>,
        Sinan Kaya <okaya@...eaurora.org>,
        Timur Tabi <timur@...eaurora.org>
Subject: Re: [PATCH v11 2/7] PCI/AER: factor out error reporting from AER

On 2018-02-24 05:12, Bjorn Helgaas wrote:
> On Fri, Feb 23, 2018 at 01:53:59PM +0530, Oza Pawandeep wrote:
>> This patch factors out error reporting callbacks, which are currently
>> tightly coupled with AER.
> 
> Add blank line between paragraphs.
> 
>> DPC should be able to register callbacks and attmept recovery when DPC
>> trigger event occurs.
> 
> s/attmept/attempt/
> 
> This patch basically moves code from aerdrv_core.c to pcie-err.c.  Make 
> it
> do *only* that.
> 

sure.

>> Signed-off-by: Oza Pawandeep <poza@...eaurora.org>
>> 
>> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
>> index fcd8191..abc514e 100644
>> --- a/drivers/pci/pci.h
>> +++ b/drivers/pci/pci.h
>> @@ -342,6 +342,9 @@ static inline resource_size_t 
>> pci_resource_alignment(struct pci_dev *dev,
>> 
>>  void pci_enable_acs(struct pci_dev *dev);
>> 
>> +/* PCI error reporting and recovery */
>> +void pcie_do_recovery(struct pci_dev *dev, int severity);
> 
> Add this declaration in the first patch, the one where you rename the
> function.
> 

done.

>>  #ifdef CONFIG_PCIEASPM
>>  void pcie_aspm_init_link_state(struct pci_dev *pdev);
>>  void pcie_aspm_exit_link_state(struct pci_dev *pdev);
>> diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
>> index 223e4c3..d669497 100644
>> --- a/drivers/pci/pcie/Makefile
>> +++ b/drivers/pci/pcie/Makefile
>> @@ -6,7 +6,7 @@
>>  # Build PCI Express ASPM if needed
>>  obj-$(CONFIG_PCIEASPM)		+= aspm.o
>> 
>> -pcieportdrv-y			:= portdrv_core.o portdrv_pci.o portdrv_bus.o
>> +pcieportdrv-y			:= portdrv_core.o portdrv_pci.o portdrv_bus.o 
>> pcie-err.o
> 
> Can we name this just "drivers/pci/pcie/err.c"?  I know we have
> pcie-dpc.c already, but it does get a little repetitious to type
> "pci" THREE times in that path.
> 

sure, will rename.

>>  pcieportdrv-$(CONFIG_ACPI)	+= portdrv_acpi.o
>> 
>>  obj-$(CONFIG_PCIEPORTBUS)	+= pcieportdrv.o
>> diff --git a/drivers/pci/pcie/aer/aerdrv.h 
>> b/drivers/pci/pcie/aer/aerdrv.h
>> index 5449e5c..bc9db53 100644
>> --- a/drivers/pci/pcie/aer/aerdrv.h
>> +++ b/drivers/pci/pcie/aer/aerdrv.h
>> @@ -76,36 +76,6 @@ struct aer_rpc {
>>  					 */
>>  };
>> 
>> -struct aer_broadcast_data {
>> -	enum pci_channel_state state;
>> -	enum pci_ers_result result;
>> -};
>> -
>> -static inline pci_ers_result_t merge_result(enum pci_ers_result orig,
>> -		enum pci_ers_result new)
>> -{
>> -	if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
>> -		return PCI_ERS_RESULT_NO_AER_DRIVER;
>> -
>> -	if (new == PCI_ERS_RESULT_NONE)
>> -		return orig;
>> -
>> -	switch (orig) {
>> -	case PCI_ERS_RESULT_CAN_RECOVER:
>> -	case PCI_ERS_RESULT_RECOVERED:
>> -		orig = new;
>> -		break;
>> -	case PCI_ERS_RESULT_DISCONNECT:
>> -		if (new == PCI_ERS_RESULT_NEED_RESET)
>> -			orig = PCI_ERS_RESULT_NEED_RESET;
>> -		break;
>> -	default:
>> -		break;
>> -	}
>> -
>> -	return orig;
>> -}
>> -
>>  extern struct bus_type pcie_port_bus_type;
>>  void aer_isr(struct work_struct *work);
>>  void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
>> diff --git a/drivers/pci/pcie/aer/aerdrv_core.c 
>> b/drivers/pci/pcie/aer/aerdrv_core.c
>> index aeb83a0..f60b1bb 100644
>> --- a/drivers/pci/pcie/aer/aerdrv_core.c
>> +++ b/drivers/pci/pcie/aer/aerdrv_core.c
>> @@ -23,6 +23,7 @@
>>  #include <linux/slab.h>
>>  #include <linux/kfifo.h>
>>  #include "aerdrv.h"
>> +#include "../../pci.h"
>> 
>>  #define	PCI_EXP_AER_FLAGS	(PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE 
>> | \
>>  				 PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
>> @@ -230,191 +231,6 @@ static bool find_source_device(struct pci_dev 
>> *parent,
>>  	return true;
>>  }
>> 
>> -static int report_error_detected(struct pci_dev *dev, void *data)
>> -{
>> -	pci_ers_result_t vote;
>> -	const struct pci_error_handlers *err_handler;
>> -	struct aer_broadcast_data *result_data;
>> -	result_data = (struct aer_broadcast_data *) data;
>> -
>> -	device_lock(&dev->dev);
>> -	dev->error_state = result_data->state;
>> -
>> -	if (!dev->driver ||
>> -		!dev->driver->err_handler ||
>> -		!dev->driver->err_handler->error_detected) {
>> -		if (result_data->state == pci_channel_io_frozen &&
>> -			dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
>> -			/*
>> -			 * In case of fatal recovery, if one of down-
>> -			 * stream device has no driver. We might be
>> -			 * unable to recover because a later insmod
>> -			 * of a driver for this device is unaware of
>> -			 * its hw state.
>> -			 */
>> -			pci_printk(KERN_DEBUG, dev, "device has %s\n",
>> -				   dev->driver ?
>> -				   "no AER-aware driver" : "no driver");
>> -		}
>> -
>> -		/*
>> -		 * If there's any device in the subtree that does not
>> -		 * have an error_detected callback, returning
>> -		 * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of
>> -		 * the subsequent mmio_enabled/slot_reset/resume
>> -		 * callbacks of "any" device in the subtree. All the
>> -		 * devices in the subtree are left in the error state
>> -		 * without recovery.
>> -		 */
>> -
>> -		if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
>> -			vote = PCI_ERS_RESULT_NO_AER_DRIVER;
>> -		else
>> -			vote = PCI_ERS_RESULT_NONE;
>> -	} else {
>> -		err_handler = dev->driver->err_handler;
>> -		vote = err_handler->error_detected(dev, result_data->state);
>> -		pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
>> -	}
>> -
>> -	result_data->result = merge_result(result_data->result, vote);
>> -	device_unlock(&dev->dev);
>> -	return 0;
>> -}
>> -
>> -static int report_mmio_enabled(struct pci_dev *dev, void *data)
>> -{
>> -	pci_ers_result_t vote;
>> -	const struct pci_error_handlers *err_handler;
>> -	struct aer_broadcast_data *result_data;
>> -	result_data = (struct aer_broadcast_data *) data;
>> -
>> -	device_lock(&dev->dev);
>> -	if (!dev->driver ||
>> -		!dev->driver->err_handler ||
>> -		!dev->driver->err_handler->mmio_enabled)
>> -		goto out;
>> -
>> -	err_handler = dev->driver->err_handler;
>> -	vote = err_handler->mmio_enabled(dev);
>> -	result_data->result = merge_result(result_data->result, vote);
>> -out:
>> -	device_unlock(&dev->dev);
>> -	return 0;
>> -}
>> -
>> -static int report_slot_reset(struct pci_dev *dev, void *data)
>> -{
>> -	pci_ers_result_t vote;
>> -	const struct pci_error_handlers *err_handler;
>> -	struct aer_broadcast_data *result_data;
>> -	result_data = (struct aer_broadcast_data *) data;
>> -
>> -	device_lock(&dev->dev);
>> -	if (!dev->driver ||
>> -		!dev->driver->err_handler ||
>> -		!dev->driver->err_handler->slot_reset)
>> -		goto out;
>> -
>> -	err_handler = dev->driver->err_handler;
>> -	vote = err_handler->slot_reset(dev);
>> -	result_data->result = merge_result(result_data->result, vote);
>> -out:
>> -	device_unlock(&dev->dev);
>> -	return 0;
>> -}
>> -
>> -static int report_resume(struct pci_dev *dev, void *data)
>> -{
>> -	const struct pci_error_handlers *err_handler;
>> -
>> -	device_lock(&dev->dev);
>> -	dev->error_state = pci_channel_io_normal;
>> -
>> -	if (!dev->driver ||
>> -		!dev->driver->err_handler ||
>> -		!dev->driver->err_handler->resume)
>> -		goto out;
>> -
>> -	err_handler = dev->driver->err_handler;
>> -	err_handler->resume(dev);
>> -	pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
>> -out:
>> -	device_unlock(&dev->dev);
>> -	return 0;
>> -}
>> -
>> -/**
>> - * broadcast_error_message - handle message broadcast to downstream 
>> drivers
>> - * @dev: pointer to from where in a hierarchy message is broadcasted 
>> down
>> - * @state: error state
>> - * @error_mesg: message to print
>> - * @cb: callback to be broadcasted
>> - *
>> - * Invoked during error recovery process. Once being invoked, the 
>> content
>> - * of error severity will be broadcasted to all downstream drivers in 
>> a
>> - * hierarchy in question.
>> - */
>> -static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
>> -	enum pci_channel_state state,
>> -	char *error_mesg,
>> -	int (*cb)(struct pci_dev *, void *))
>> -{
>> -	struct aer_broadcast_data result_data;
>> -
>> -	pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg);
>> -	result_data.state = state;
>> -	if (cb == report_error_detected)
>> -		result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
>> -	else
>> -		result_data.result = PCI_ERS_RESULT_RECOVERED;
>> -
>> -	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
>> -		/*
>> -		 * If the error is reported by a bridge, we think this error
>> -		 * is related to the downstream link of the bridge, so we
>> -		 * do error recovery on all subordinates of the bridge instead
>> -		 * of the bridge and clear the error status of the bridge.
>> -		 */
>> -		if (cb == report_error_detected)
>> -			dev->error_state = state;
>> -		pci_walk_bus(dev->subordinate, cb, &result_data);
>> -		if (cb == report_resume) {
>> -			pci_cleanup_aer_uncorrect_error_status(dev);
>> -			dev->error_state = pci_channel_io_normal;
>> -		}
>> -	} else {
>> -		/*
>> -		 * If the error is reported by an end point, we think this
>> -		 * error is related to the upstream link of the end point.
>> -		 */
>> -		if (state == pci_channel_io_normal)
>> -			/*
>> -			 * the error is non fatal so the bus is ok, just invoke
>> -			 * the callback for the function that logged the error.
>> -			 */
>> -			cb(dev, &result_data);
>> -		else
>> -			pci_walk_bus(dev->bus, cb, &result_data);
>> -	}
>> -
>> -	return result_data.result;
>> -}
>> -
>> -/**
>> - * default_reset_link - default reset function
>> - * @dev: pointer to pci_dev data structure
>> - *
>> - * Invoked when performing link reset on a Downstream Port or a
>> - * Root Port with no aer driver.
>> - */
>> -static pci_ers_result_t default_reset_link(struct pci_dev *dev)
>> -{
>> -	pci_reset_bridge_secondary_bus(dev);
>> -	pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
>> -	return PCI_ERS_RESULT_RECOVERED;
>> -}
>> -
>>  static int find_aer_service_iter(struct device *device, void *data)
>>  {
>>  	struct pcie_port_service_driver *service_driver, **drv;
>> @@ -432,7 +248,7 @@ static int find_aer_service_iter(struct device 
>> *device, void *data)
>>  	return 0;
>>  }
>> 
>> -static struct pcie_port_service_driver *find_aer_service(struct 
>> pci_dev *dev)
>> +struct pcie_port_service_driver *pci_find_aer_service(struct pci_dev 
>> *dev)
> 
> Move this rename to a different patch.  The new name should probably
> start with "pcie" like you did with pcie_do_recovery().
> 

sure, will do that.

>>  {
>>  	struct pcie_port_service_driver *drv = NULL;
>> 
>> @@ -440,107 +256,7 @@ static struct pcie_port_service_driver 
>> *find_aer_service(struct pci_dev *dev)
>> 
>>  	return drv;
>>  }
>> -
>> -static pci_ers_result_t reset_link(struct pci_dev *dev)
>> -{
>> -	struct pci_dev *udev;
>> -	pci_ers_result_t status;
>> -	struct pcie_port_service_driver *driver;
>> -
>> -	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
>> -		/* Reset this port for all subordinates */
>> -		udev = dev;
>> -	} else {
>> -		/* Reset the upstream component (likely downstream port) */
>> -		udev = dev->bus->self;
>> -	}
>> -
>> -	/* Use the aer driver of the component firstly */
>> -	driver = find_aer_service(udev);
>> -
>> -	if (driver && driver->reset_link) {
>> -		status = driver->reset_link(udev);
>> -	} else if (udev->has_secondary_link) {
>> -		status = default_reset_link(udev);
>> -	} else {
>> -		pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream 
>> device %s\n",
>> -			pci_name(udev));
>> -		return PCI_ERS_RESULT_DISCONNECT;
>> -	}
>> -
>> -	if (status != PCI_ERS_RESULT_RECOVERED) {
>> -		pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s 
>> failed\n",
>> -			pci_name(udev));
>> -		return PCI_ERS_RESULT_DISCONNECT;
>> -	}
>> -
>> -	return status;
>> -}
>> -
>> -/**
>> - * pcie_do_recovery - handle nonfatal/fatal error recovery process
>> - * @dev: pointer to a pci_dev data structure of agent detecting an 
>> error
>> - * @severity: error severity type
>> - *
>> - * Invoked when an error is nonfatal/fatal. Once being invoked, 
>> broadcast
>> - * error detected message to all downstream drivers within a 
>> hierarchy in
>> - * question and return the returned code.
>> - */
>> -static void pcie_do_recovery(struct pci_dev *dev, int severity)
>> -{
>> -	pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
>> -	enum pci_channel_state state;
>> -
>> -	if (severity == AER_FATAL)
>> -		state = pci_channel_io_frozen;
>> -	else
>> -		state = pci_channel_io_normal;
>> -
>> -	status = broadcast_error_message(dev,
>> -			state,
>> -			"error_detected",
>> -			report_error_detected);
>> -
>> -	if (severity == AER_FATAL) {
>> -		result = reset_link(dev);
>> -		if (result != PCI_ERS_RESULT_RECOVERED)
>> -			goto failed;
>> -	}
>> -
>> -	if (status == PCI_ERS_RESULT_CAN_RECOVER)
>> -		status = broadcast_error_message(dev,
>> -				state,
>> -				"mmio_enabled",
>> -				report_mmio_enabled);
>> -
>> -	if (status == PCI_ERS_RESULT_NEED_RESET) {
>> -		/*
>> -		 * TODO: Should call platform-specific
>> -		 * functions to reset slot before calling
>> -		 * drivers' slot_reset callbacks?
>> -		 */
>> -		status = broadcast_error_message(dev,
>> -				state,
>> -				"slot_reset",
>> -				report_slot_reset);
>> -	}
>> -
>> -	if (status != PCI_ERS_RESULT_RECOVERED)
>> -		goto failed;
>> -
>> -	broadcast_error_message(dev,
>> -				state,
>> -				"resume",
>> -				report_resume);
>> -
>> -	pci_info(dev, "AER: Device recovery successful\n");
>> -	return;
>> -
>> -failed:
>> -	pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
>> -	/* TODO: Should kernel panic here? */
>> -	pci_info(dev, "AER: Device recovery failed\n");
>> -}
>> +EXPORT_SYMBOL_GPL(pci_find_aer_service);
> 
> This is never called from a module, so I don't think you need to
> export it at all.  If you do, it should be a separate patch, not
> buried in the middle of this big one.
> 

got it, will see if its really required to be exported.
but certainly, will remove it from this patch.

>>  /**
>>   * handle_error_source - handle logging error into an event log
>> diff --git a/drivers/pci/pcie/pcie-err.c b/drivers/pci/pcie/pcie-err.c
>> new file mode 100644
>> index 0000000..fcd5add
>> --- /dev/null
>> +++ b/drivers/pci/pcie/pcie-err.c
>> @@ -0,0 +1,334 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * This file implements the error recovery as a core part of PCIe 
>> error reporting.
>> + * When a PCIe error is delivered, an error message will be collected 
>> and printed
>> + * to console, then, an error recovery procedure will be executed by 
>> following
>> + * the PCI error recovery rules.
> 
> Wrap this so it fits in 80 columns.

I thought of keeping the way it was before (hence did not change it)
I would change it now.

> 
>> + * Copyright (C) 2006 Intel Corp.
>> + *	Tom Long Nguyen (tom.l.nguyen@...el.com)
>> + *	Zhang Yanmin (yanmin.zhang@...el.com)
>> + *
>> + */
>> +
>> +#include <linux/pci.h>
>> +#include <linux/module.h>
>> +#include <linux/pci.h>
>> +#include <linux/kernel.h>
>> +#include <linux/errno.h>
>> +#include <linux/aer.h>
>> +#include <linux/pcieport_if.h>
>> +#include "portdrv.h"
>> +
>> +struct aer_broadcast_data {
>> +	enum pci_channel_state state;
>> +	enum pci_ers_result result;
>> +};
>> +
>> +static pci_ers_result_t merge_result(enum pci_ers_result orig,
>> +				  enum pci_ers_result new)
>> +{
>> +	if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
>> +		return PCI_ERS_RESULT_NO_AER_DRIVER;
>> +
>> +	if (new == PCI_ERS_RESULT_NONE)
>> +		return orig;
>> +
>> +	switch (orig) {
>> +	case PCI_ERS_RESULT_CAN_RECOVER:
>> +	case PCI_ERS_RESULT_RECOVERED:
>> +		orig = new;
>> +		break;
>> +	case PCI_ERS_RESULT_DISCONNECT:
>> +		if (new == PCI_ERS_RESULT_NEED_RESET)
>> +			orig = PCI_ERS_RESULT_NEED_RESET;
>> +		break;
>> +	default:
>> +		break;
>> +	}
>> +
>> +	return orig;
>> +}
>> +
>> +static int report_mmio_enabled(struct pci_dev *dev, void *data)
>> +{
>> +	pci_ers_result_t vote;
>> +	const struct pci_error_handlers *err_handler;
>> +	struct aer_broadcast_data *result_data;
>> +
>> +	result_data = (struct aer_broadcast_data *) data;
>> +
>> +	device_lock(&dev->dev);
>> +	if (!dev->driver ||
>> +		!dev->driver->err_handler ||
>> +		!dev->driver->err_handler->mmio_enabled)
>> +		goto out;
>> +
>> +	err_handler = dev->driver->err_handler;
>> +	vote = err_handler->mmio_enabled(dev);
>> +	result_data->result = merge_result(result_data->result, vote);
>> +out:
>> +	device_unlock(&dev->dev);
>> +	return 0;
>> +}
>> +
>> +static int report_slot_reset(struct pci_dev *dev, void *data)
>> +{
>> +	pci_ers_result_t vote;
>> +	const struct pci_error_handlers *err_handler;
>> +	struct aer_broadcast_data *result_data;
>> +
>> +	result_data = (struct aer_broadcast_data *) data;
>> +
>> +	device_lock(&dev->dev);
>> +	if (!dev->driver ||
>> +		!dev->driver->err_handler ||
>> +		!dev->driver->err_handler->slot_reset)
>> +		goto out;
>> +
>> +	err_handler = dev->driver->err_handler;
>> +	vote = err_handler->slot_reset(dev);
>> +	result_data->result = merge_result(result_data->result, vote);
>> +out:
>> +	device_unlock(&dev->dev);
>> +	return 0;
>> +}
>> +
>> +static int report_resume(struct pci_dev *dev, void *data)
>> +{
>> +	const struct pci_error_handlers *err_handler;
>> +
>> +	device_lock(&dev->dev);
>> +	dev->error_state = pci_channel_io_normal;
>> +
>> +	if (!dev->driver ||
>> +		!dev->driver->err_handler ||
>> +		!dev->driver->err_handler->resume)
>> +		goto out;
>> +
>> +	err_handler = dev->driver->err_handler;
>> +	err_handler->resume(dev);
>> +out:
>> +	device_unlock(&dev->dev);
>> +	return 0;
>> +}
>> +
>> +static int report_error_detected(struct pci_dev *dev, void *data)
>> +{
>> +	pci_ers_result_t vote;
>> +	const struct pci_error_handlers *err_handler;
>> +	struct aer_broadcast_data *result_data;
>> +
>> +	result_data = (struct aer_broadcast_data *) data;
>> +
>> +	device_lock(&dev->dev);
>> +	dev->error_state = result_data->state;
>> +
>> +	if (!dev->driver ||
>> +		!dev->driver->err_handler ||
>> +		!dev->driver->err_handler->error_detected) {
>> +		if (result_data->state == pci_channel_io_frozen &&
>> +			dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
>> +			/*
>> +			 * In case of fatal recovery, if one of down-
>> +			 * stream device has no driver. We might be
>> +			 * unable to recover because a later insmod
>> +			 * of a driver for this device is unaware of
>> +			 * its hw state.
>> +			 */
>> +			dev_printk(KERN_DEBUG, &dev->dev, "device has %s\n",
>> +				   dev->driver ?
>> +				   "no error-aware driver" : "no driver");
> 
> This was a pci_printk() before you moved it and it should be the same 
> here.
> 

sure, will correct this.

>> +		}
>> +
>> +		/*
>> +		 * If there's any device in the subtree that does not
>> +		 * have an error_detected callback, returning
>> +		 * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of
>> +		 * the subsequent mmio_enabled/slot_reset/resume
>> +		 * callbacks of "any" device in the subtree. All the
>> +		 * devices in the subtree are left in the error state
>> +		 * without recovery.
>> +		 */
>> +
>> +		if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
>> +			vote = PCI_ERS_RESULT_NO_AER_DRIVER;
>> +		else
>> +			vote = PCI_ERS_RESULT_NONE;
>> +	} else {
>> +		err_handler = dev->driver->err_handler;
>> +		vote = err_handler->error_detected(dev, result_data->state);
>> +	}
>> +
>> +	result_data->result = merge_result(result_data->result, vote);
>> +	device_unlock(&dev->dev);
>> +	return 0;
>> +}
>> +
>> +/**
>> + * default_reset_link - default reset function
>> + * @dev: pointer to pci_dev data structure
>> + *
>> + * Invoked when performing link reset on a Downstream Port or a
>> + * Root Port with no aer driver.
>> + */
>> +static pci_ers_result_t default_reset_link(struct pci_dev *dev)
>> +{
>> +	pci_reset_bridge_secondary_bus(dev);
>> +	dev_printk(KERN_DEBUG, &dev->dev, "downstream link has been 
>> reset\n");
> 
> This should be a pci_printk() as it was before the move.  There are 
> more
> below.
> 

yes all will be corrected.
thanks.

>> +	return PCI_ERS_RESULT_RECOVERED;
>> +}
>> +
>> +static pci_ers_result_t reset_link(struct pci_dev *dev)
>> +{
>> +	struct pci_dev *udev;
>> +	pci_ers_result_t status;
>> +	struct pcie_port_service_driver *driver = NULL;
>> +
>> +	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
>> +		/* Reset this port for all subordinates */
>> +		udev = dev;
>> +	} else {
>> +		/* Reset the upstream component (likely downstream port) */
>> +		udev = dev->bus->self;
>> +	}
>> +
>> +#if IS_ENABLED(CONFIG_PCIEAER)
> 
> AER can't be a module, so you can use just:
> 
>   #ifdef CONFIG_PCIEAER
> 
> This ifdef should be added in the patch where you add a caller from 
> non-AER
> code.  This patch should only move code, not change it.

ok, it can remain unchanged. but reset_link() is called by 
pcie_do_recovery()
and pcie_do_recovery can be called by various agents such as AER, DPC.
so let us say if DPC calls pcie_do_recovery, then DPC has no way of 
knowing that AER is enabled or not.
in fact it should not know, but err.c/reset_link() should take care 
somehow.

I can make it a separate patch to do #ifdef CONFIG_PCIEAER inside 
reset_link()
or
I can add severity parameter in reset_link() so based on severity it can 
find the service.

but I think you have comment to unify the find_aer_service and 
find_dpc_service into a pcie_find_service (routine)
so I will see how I can club and take care of this comment. [without the 
need of #ifdef]

> 
>> +	/* Use the aer driver of the component firstly */
>> +	driver = pci_find_aer_service(udev);
>> +#endif
>> +
>> +	if (driver && driver->reset_link) {
>> +		status = driver->reset_link(udev);
>> +	} else if (udev->has_secondary_link) {
>> +		status = default_reset_link(udev);
>> +	} else {
>> +		dev_printk(KERN_DEBUG, &dev->dev,
>> +			"no link-reset support at upstream device %s\n",
>> +			pci_name(udev));
>> +		return PCI_ERS_RESULT_DISCONNECT;
>> +	}
>> +
>> +	if (status != PCI_ERS_RESULT_RECOVERED) {
>> +		dev_printk(KERN_DEBUG, &dev->dev,
>> +			"link reset at upstream device %s failed\n",
>> +			pci_name(udev));
>> +		return PCI_ERS_RESULT_DISCONNECT;
>> +	}
>> +
>> +	return status;
>> +}
>> +
>> +/**
>> + * broadcast_error_message - handle message broadcast to downstream 
>> drivers
>> + * @dev: pointer to where in a hierarchy message is broadcasted down
>> + * @state: error state
>> + * @error_mesg: message to print
>> + * @cb: callback to be broadcast
>> + *
>> + * Invoked during error recovery process. Once being invoked, the 
>> content
>> + * of error severity will be broadcast to all downstream drivers in a
>> + * hierarchy in question.
>> + */
>> +static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
>> +	enum pci_channel_state state,
>> +	char *error_mesg,
>> +	int (*cb)(struct pci_dev *, void *))
>> +{
>> +	struct aer_broadcast_data result_data;
>> +
>> +	dev_printk(KERN_DEBUG, &dev->dev, "broadcast %s message\n", 
>> error_mesg);
>> +	result_data.state = state;
>> +	if (cb == report_error_detected)
>> +		result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
>> +	else
>> +		result_data.result = PCI_ERS_RESULT_RECOVERED;
>> +
>> +	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
>> +		/*
>> +		 * If the error is reported by a bridge, we think this error
>> +		 * is related to the downstream link of the bridge, so we
>> +		 * do error recovery on all subordinates of the bridge instead
>> +		 * of the bridge and clear the error status of the bridge.
>> +		 */
>> +		if (cb == report_error_detected)
>> +			dev->error_state = state;
>> +		pci_walk_bus(dev->subordinate, cb, &result_data);
>> +		if (cb == report_resume) {
>> +			pci_cleanup_aer_uncorrect_error_status(dev);
>> +			dev->error_state = pci_channel_io_normal;
>> +		}
>> +	} else {
>> +		/*
>> +		 * If the error is reported by an end point, we think this
>> +		 * error is related to the upstream link of the end point.
>> +		 */
>> +		pci_walk_bus(dev->bus, cb, &result_data);
>> +	}
>> +
>> +	return result_data.result;
>> +}
>> +
>> +/**
>> + * pcie_do_recovery - handle nonfatal/fatal error recovery process
>> + * @dev: pointer to a pci_dev data structure of agent detecting an 
>> error
>> + * @severity: error severity type
>> + *
>> + * Invoked when an error is nonfatal/fatal. Once being invoked, 
>> broadcast
>> + * error detected message to all downstream drivers within a 
>> hierarchy in
>> + * question and return the returned code.
>> + */
>> +void pcie_do_recovery(struct pci_dev *dev, int severity)
>> +{
>> +	pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
>> +	enum pci_channel_state state;
>> +
>> +	if (severity == AER_FATAL)
>> +		state = pci_channel_io_frozen;
>> +	else
>> +		state = pci_channel_io_normal;
>> +
>> +	status = broadcast_error_message(dev,
>> +			state,
>> +			"error_detected",
>> +			report_error_detected);
>> +
>> +	if (severity == AER_FATAL) {
>> +		result = reset_link(dev);
>> +		if (result != PCI_ERS_RESULT_RECOVERED)
>> +			goto failed;
>> +	}
>> +
>> +	if (status == PCI_ERS_RESULT_CAN_RECOVER)
>> +		status = broadcast_error_message(dev,
>> +				state,
>> +				"mmio_enabled",
>> +				report_mmio_enabled);
>> +
>> +	if (status == PCI_ERS_RESULT_NEED_RESET) {
>> +		/*
>> +		 * TODO: Should call platform-specific
>> +		 * functions to reset slot before calling
>> +		 * drivers' slot_reset callbacks?
>> +		 */
>> +		status = broadcast_error_message(dev,
>> +				state,
>> +				"slot_reset",
>> +				report_slot_reset);
>> +	}
>> +
>> +	if (status != PCI_ERS_RESULT_RECOVERED)
>> +		goto failed;
>> +
>> +	broadcast_error_message(dev,
>> +				state,
>> +				"resume",
>> +				report_resume);
>> +
>> +	dev_info(&dev->dev, "Device recovery successful\n");
>> +	return;
>> +
>> +failed:
>> +	/* TODO: Should kernel panic here? */
>> +	dev_info(&dev->dev, "Device recovery failed\n");
>> +}
>> diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
>> index a854bc5..4f1992d 100644
>> --- a/drivers/pci/pcie/portdrv.h
>> +++ b/drivers/pci/pcie/portdrv.h
>> @@ -79,4 +79,5 @@ static inline void pcie_port_platform_notify(struct 
>> pci_dev *port, int *mask)
>>  static inline void pcie_port_platform_notify(struct pci_dev *port, 
>> int *mask){}
>>  #endif /* !CONFIG_ACPI */
>> 
>> +struct pcie_port_service_driver *pci_find_aer_service(struct pci_dev 
>> *dev);
> 
> Should be in a different patch, maybe the one where you rename it.
> 
>>  #endif /* _PORTDRV_H_ */
>> --
>> Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm 
>> Technologies, Inc.,
>> a Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a 
>> Linux Foundation Collaborative Project.
>> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ