linux-kernel - [PATCH 3.13 32/46] powerpc/powernv: Dump PHB diag-data immediately

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20140328173139.045806269@linuxfoundation.org>
Date:	Fri, 28 Mar 2014 10:32:16 -0700
From:	Greg Kroah-Hartman <gregkh@...uxfoundation.org>
To:	linux-kernel@...r.kernel.org
Cc:	Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
	stable@...r.kernel.org, Gavin Shan <shangw@...ux.vnet.ibm.com>,
	Benjamin Herrenschmidt <benh@...nel.crashing.org>
Subject: [PATCH 3.13 32/46] powerpc/powernv: Dump PHB diag-data immediately

3.13-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Gavin Shan <shangw@...ux.vnet.ibm.com>

commit 947166043732b69878123bf31f51933ad0316080 upstream.

The PHB diag-data is important to help locating the root cause for
EEH errors such as frozen PE or fenced PHB. However, the EEH core
enables IO path by clearing part of HW registers before collecting
this data causing it to be corrupted.

This patch fixes this by dumping the PHB diag-data immediately when
frozen/fenced state on PE or PHB is detected for the first time in
eeh_ops::get_state() or next_error() backend.

Signed-off-by: Gavin Shan <shangw@...ux.vnet.ibm.com>
CC: <stable@...r.kernel.org>
Signed-off-by: Benjamin Herrenschmidt <benh@...nel.crashing.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@...uxfoundation.org>

---
 arch/powerpc/platforms/powernv/eeh-ioda.c |   99 +++++++++++++-----------------
 1 file changed, 43 insertions(+), 56 deletions(-)

--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -114,6 +114,7 @@ DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_db
 			ioda_eeh_inbB_dbgfs_set, "0x%llx\n");
 #endif /* CONFIG_DEBUG_FS */
 
+
 /**
  * ioda_eeh_post_init - Chip dependent post initialization
  * @hose: PCI controller
@@ -221,6 +222,22 @@ static int ioda_eeh_set_option(struct ee
 	return ret;
 }
 
+static void ioda_eeh_phb_diag(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+	long rc;
+
+	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
+					 PNV_PCI_DIAG_BUF_SIZE);
+	if (rc != OPAL_SUCCESS) {
+		pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
+			    __func__, hose->global_number, rc);
+		return;
+	}
+
+	pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
+}
+
 /**
  * ioda_eeh_get_state - Retrieve the state of PE
  * @pe: EEH PE
@@ -272,6 +289,9 @@ static int ioda_eeh_get_state(struct eeh
 			result |= EEH_STATE_DMA_ACTIVE;
 			result |= EEH_STATE_MMIO_ENABLED;
 			result |= EEH_STATE_DMA_ENABLED;
+		} else if (!(pe->state & EEH_PE_ISOLATED)) {
+			eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+			ioda_eeh_phb_diag(hose);
 		}
 
 		return result;
@@ -315,6 +335,15 @@ static int ioda_eeh_get_state(struct eeh
 			   __func__, fstate, hose->global_number, pe_no);
 	}
 
+	/* Dump PHB diag-data for frozen PE */
+	if (result != EEH_STATE_NOT_SUPPORT &&
+	    (result & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) !=
+	    (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE) &&
+	    !(pe->state & EEH_PE_ISOLATED)) {
+		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+		ioda_eeh_phb_diag(hose);
+	}
+
 	return result;
 }
 
@@ -530,45 +559,6 @@ static int ioda_eeh_reset(struct eeh_pe
 }
 
 /**
- * ioda_eeh_get_log - Retrieve error log
- * @pe: EEH PE
- * @severity: Severity level of the log
- * @drv_log: buffer to store the log
- * @len: space of the log buffer
- *
- * The function is used to retrieve error log from P7IOC.
- */
-static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
-			    char *drv_log, unsigned long len)
-{
-	s64 ret;
-	unsigned long flags;
-	struct pci_controller *hose = pe->phb;
-	struct pnv_phb *phb = hose->private_data;
-
-	spin_lock_irqsave(&phb->lock, flags);
-
-	ret = opal_pci_get_phb_diag_data2(phb->opal_id,
-			phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
-	if (ret) {
-		spin_unlock_irqrestore(&phb->lock, flags);
-		pr_warning("%s: Can't get log for PHB#%x-PE#%x (%lld)\n",
-			   __func__, hose->global_number, pe->addr, ret);
-		return -EIO;
-	}
-
-	/*
-	 * FIXME: We probably need log the error in somewhere.
-	 * Lets make it up in future.
-	 */
-	/* pr_info("%s", phb->diag.blob); */
-
-	spin_unlock_irqrestore(&phb->lock, flags);
-
-	return 0;
-}
-
-/**
  * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE
  * @pe: EEH PE
  *
@@ -649,22 +639,6 @@ static void ioda_eeh_hub_diag(struct pci
 	}
 }
 
-static void ioda_eeh_phb_diag(struct pci_controller *hose)
-{
-	struct pnv_phb *phb = hose->private_data;
-	long rc;
-
-	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
-					 PNV_PCI_DIAG_BUF_SIZE);
-	if (rc != OPAL_SUCCESS) {
-		pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
-			    __func__, hose->global_number, rc);
-		return;
-	}
-
-	pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
-}
-
 static int ioda_eeh_get_phb_pe(struct pci_controller *hose,
 			       struct eeh_pe **pe)
 {
@@ -827,6 +801,20 @@ static int ioda_eeh_next_error(struct ee
 		}
 
 		/*
+		 * EEH core will try recover from fenced PHB or
+		 * frozen PE. In the time for frozen PE, EEH core
+		 * enable IO path for that before collecting logs,
+		 * but it ruins the site. So we have to dump the
+		 * log in advance here.
+		 */
+		if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
+		    ret == EEH_NEXT_ERR_FENCED_PHB) &&
+		    !((*pe)->state & EEH_PE_ISOLATED)) {
+			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+			ioda_eeh_phb_diag(hose);
+		}
+
+		/*
 		 * If we have no errors on the specific PHB or only
 		 * informative error there, we continue poking it.
 		 * Otherwise, we need actions to be taken by upper
@@ -844,7 +832,6 @@ struct pnv_eeh_ops ioda_eeh_ops = {
 	.set_option		= ioda_eeh_set_option,
 	.get_state		= ioda_eeh_get_state,
 	.reset			= ioda_eeh_reset,
-	.get_log		= ioda_eeh_get_log,
 	.configure_bridge	= ioda_eeh_configure_bridge,
 	.next_error		= ioda_eeh_next_error
 };


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/