linux-kernel - Fwd: cciss: PCI power management reset for kexec

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <fa4a66a6-0729-41e4-9cd5-5380a7a0058f@d36g2000prf.googlegroups.com>
Date:	Mon, 9 Feb 2009 08:20:36 -0800 (PST)
From:	mikem1355@...il.com
To:	akpm@...ux-foundation.org, jens.axboe@...cle.com,
	randy.dunlap@...cle.com
Cc:	linux-kernel@...r.kernel.org, linux-scsi@...r.kernel.org
Subject: Fwd: cciss: PCI power management reset for kexec




---------- Forwarded message ----------
From: Mike Miller <mike.mil...@...com>
Date: Feb 7, 1:40 pm
Subject: cciss: PCI power management reset for kexec
To: linux.kernel


Patch 1 of 1

This patch provides the better "kick-in-the-pants" on driver load in a
kexec'ed environment.

I've successfully sanity tested the port in my lab. Randy, please
apply and
test. You seem to be able to bring out the worst in the driver. ;-)

Author: Chip Coldwell <coldw...@...hat.com>

   CCISS: Use PCI power management to reset the controller

    The kexec kernel resets theCCISShardware in three steps:

    1. Use PCI power management states to reset the controller
       in the kexec kernel.
    2. Clear the MSI/MSI-X bits in PCI configuration space so
       that MSI initialization in the kexec kernel doesn't fail.
    3. Use theCCISS"No-op" message to determine when the
       controller firmware has recovered from the PCI PM reset.

Signed-off-by: Mike Miller <mike.mil...@...com>

I should have given more detail about using this reset patch. You must
use the kernel command line option "reset_devices" in the kexec
command line. My testing used:

# kexec -l /boot/vmlinuz --initrd=/boot/initrd.img --append="root=/dev/
cciss/c0d0p2 reset_devices 3 maxcpus=1 irqpoll"

then

# kexec -e

Please consider this for inclusion.

-------------------------------------------------------------------------------
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 01e6938..ff4a105 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -3390,6 +3390,205 @@ static void free_hba(int i)
        kfree(p);
 }

+/* Send a message CDB to the firmware. */
+static __devinit int cciss_message(struct pci_dev *pdev, unsigned
char opcode, unsigned char type)
+{
+       typedef struct {
+               CommandListHeader_struct CommandHeader;
+               RequestBlock_struct Request;
+               ErrDescriptor_struct ErrorDescriptor;
+       } Command;
+       static const size_t cmd_sz = sizeof(Command) + sizeof
(ErrorInfo_struct);
+       Command *cmd;
+       dma_addr_t paddr64;
+       uint32_t paddr32, tag;
+       void __iomem *vaddr;
+       int i, err;
+
+       vaddr = ioremap_nocache(pci_resource_start(pdev, 0),
pci_resource_len(pdev, 0));
+       if (vaddr == NULL)
+               return -ENOMEM;
+
+       /* The Inbound Post Queue only accepts 32-bit physical
addresses for the
+          CCISScommands, so they must be allocated from the lower
4GiB of
+          memory. */
+       err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+       if (err) {
+               iounmap(vaddr);
+               return -ENOMEM;
+       }
+
+       cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
+       if (cmd == NULL) {
+               iounmap(vaddr);
+               return -ENOMEM;
+       }
+
+       /* This must fit, because of the 32-bit consistent DMA mask.
Also,
+          although there's no guarantee, we assume that the address
is at
+          least 4-byte aligned (most likely, it's page-aligned). */
+       paddr32 = paddr64;
+
+       cmd->CommandHeader.ReplyQueue = 0;
+       cmd->CommandHeader.SGList = 0;
+       cmd->CommandHeader.SGTotal = 0;
+       cmd->CommandHeader.Tag.lower = paddr32;
+       cmd->CommandHeader.Tag.upper = 0;
+       memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8);
+
+       cmd->Request.CDBLen = 16;
+       cmd->Request.Type.Type = TYPE_MSG;
+       cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE;
+       cmd->Request.Type.Direction = XFER_NONE;
+       cmd->Request.Timeout = 0; /* Don't time out */
+       cmd->Request.CDB[0] = opcode;
+       cmd->Request.CDB[1] = type;
+       memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is
reserved */
+
+       cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command);
+       cmd->ErrorDescriptor.Addr.upper = 0;
+       cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct);
+
+       writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET);
+
+       for (i = 0; i < 10; i++) {
+               tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
+               if ((tag & ~3) == paddr32)
+                       break;
+               schedule_timeout_uninterruptible(HZ);
+       }
+
+       iounmap(vaddr);
+
+       /* we leak the DMA buffer here ... no choice since the
controller could
+          still complete the command. */
+       if (i == 10) {
+               printk(KERN_ERR "cciss: controller message %02x:%02x
timed out\n",
+                       opcode, type);
+               return -ETIMEDOUT;
+       }
+
+       pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
+
+       if (tag & 2) {
+               printk(KERN_ERR "cciss: controller message %02x:%02x
failed\n",
+                       opcode, type);
+               return -EIO;
+       }
+
+       printk(KERN_INFO "cciss: controller message %02x:%02x succeeded
\n",
+               opcode, type);
+       return 0;
+}
+
+#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0)
+#define cciss_noop(p) cciss_message(p, 3, 0)
+
+static __devinit int cciss_reset_msi(struct pci_dev *pdev)
+{
+/* the #defines are stolen from drivers/pci/msi.h. */
+#define msi_control_reg(base)          (base + PCI_MSI_FLAGS)
+#define PCI_MSIX_FLAGS_ENABLE          (1 << 15)
+
+       int pos;
+       u16 control = 0;
+
+       pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+       if (pos) {
+               pci_read_config_word(pdev, msi_control_reg(pos),
&control);
+               if (control & PCI_MSI_FLAGS_ENABLE) {
+                       printk(KERN_INFO "cciss: resetting MSI\n");
+                       pci_write_config_word(pdev, msi_control_reg
(pos), control & ~PCI_MSI_FLAGS_ENABLE);
+               }
+       }
+
+       pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
+       if (pos) {
+               pci_read_config_word(pdev, msi_control_reg(pos),
&control);
+               if (control & PCI_MSIX_FLAGS_ENABLE) {
+                       printk(KERN_INFO "cciss: resetting MSI-X\n");
+                       pci_write_config_word(pdev, msi_control_reg
(pos), control & ~PCI_MSIX_FLAGS_ENABLE);
+               }
+       }
+
+       return 0;
+}
+
+/* This does a hard reset of the controller using PCI power
management
+ * states. */
+static __devinit int cciss_hard_reset_controller(struct pci_dev
*pdev)
+{
+       u16 pmcsr, saved_config_space[32];
+       int i, pos;
+
+       printk(KERN_INFO "cciss: using PCI PM to reset controller\n");
+
+       /* This is very nearly the same thing as
+
+          pci_save_state(pci_dev);
+          pci_set_power_state(pci_dev, PCI_D3hot);
+          pci_set_power_state(pci_dev, PCI_D0);
+          pci_restore_state(pci_dev);
+
+          but we can't use these nice canned kernel routines on
+          kexec, because they also check the MSI/MSI-X state in PCI
+          configuration space and do the wrong thing when it is
+          set/cleared.  Also, the pci_save/restore_state functions
+          violate the ordering requirements for restoring the
+          configuration space from theCCISSdocument (see the
+          comment below).  So we roll our own .... */
+
+       for (i = 0; i < 32; i++)
+               pci_read_config_word(pdev, 2*i, &saved_config_space
[i]);
+
+       pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
+       if (pos == 0) {
+               printk(KERN_ERR "cciss_reset_controller: PCI PM not
supported\n");
+               return -ENODEV;
+       }
+
+       /* Quoting from the Open CISS Specification: "The Power
+        * Management Control/Status Register (CSR) controls the power
+        * state of the device.  The normal operating state is D0,
+        * CSR=00h.  The software off state is D3, CSR=03h.  To reset
+        * the controller, place the interface device in D3 then to
+        * D0, this causes a secondary PCI reset which will reset the
+        * controller." */
+
+       /* enter the D3hot power management state */
+       pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
+       pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+       pmcsr |= PCI_D3hot;
+       pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+
+       set_current_state(TASK_UNINTERRUPTIBLE);
+       schedule_timeout(HZ >> 1);
+
+       /* enter the D0 power management state */
+       pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+       pmcsr |= PCI_D0;
+       pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+
+       set_current_state(TASK_UNINTERRUPTIBLE);
+       schedule_timeout(HZ >> 1);
+
+       /* Restore the PCI configuration space.  The Open CISS
+        * Specification says, "Restore the PCI Configuration
+        * Registers, offsets 00h through 60h. It is important to
+        * restore the command register, 16-bits at offset 04h,
+        * last. Do not restore the configuration status register,
+        * 16-bits at offset 06h."  Note that the offset is 2*i. */
+       for (i = 0; i < 32; i++) {
+               if (i == 2 || i == 3)
+                       continue;
+               pci_write_config_word(pdev, 2*i, saved_config_space
[i]);
+       }
+       wmb();
+       pci_write_config_word(pdev, 4, saved_config_space[2]);
+
+       return 0;
+}
+
 /*
  *  This is it.  Find all the controllers and register them.  I
really hate
  *  stealing all these major device numbers.
@@ -3404,6 +3603,24 @@ static int __devinit cciss_init_one(struct
pci_dev *pdev,
        int dac, return_code;
        InquiryData_struct *inq_buff = NULL;

+       if (reset_devices) {
+               /* Reset the controller with a PCI power-cycle */
+               if (cciss_hard_reset_controller(pdev) ||
cciss_reset_msi(pdev))
+                       return -ENODEV;
+
+               /* Some devices (notably the HP Smart Array 5i
Controller)
+                  need a little pause here */
+               schedule_timeout_uninterruptible(30*HZ);
+
+               /* Now try to get the controller to respond to a no-op
*/
+               for (i=0; i<12; i++) {
+                       if (cciss_noop(pdev) == 0)
+                               break;
+                       else
+                               printk("cciss: no-op failed%s\n", (i <
11 ? "; re-trying" : ""));
+               }
+       }
+
        i = alloc_cciss_hba();
        if (i < 0)
                return -1;
--
To unsubscribe from this list: send the line "unsubscribe linux-
kernel" in
the body of a message to majord...@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/