[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <200802191049.25869.david-b@pacbell.net>
Date: Tue, 19 Feb 2008 10:49:25 -0800
From: David Brownell <david-b@...bell.net>
To: Andre Tomt <andre@...t.net>
Cc: linux-kernel@...r.kernel.org, linux-usb@...r.kernel.org
Subject: Re: USB OOPS 2.6.25-rc2-git1
On Tuesday 19 February 2008, Andre Tomt wrote:
> Got this on a serial console today, using 2.6.25-rc2-git1. Machine was
> not doing anything interesting at the time, but has its / and kernel on
> a usb-storage device (usb pen drive).
Can you try this diagnostic patch, to see if it reports any messages
about IAA and/or IAAD oddities? There's surely a quick workaround
for this, but I'd rather understand the root cause before patching.
- Dave
Work around for an evident bug in one EHCI controller: IAA didn't get
set when IAAD was cleared. Evidently writing the status register can
prevent setting IAA; someone's VHDL (or whatever) code was wrong.
This workaround catches that specific bug (in the IRQ handler and in
the IAA watchdog) and treats it as if IAA was properly set.
The patch also adds *LOTS* of related paranoia, insisting IAAD is clear
(or set, as appropriate) at various points, and adding code to improve
the handling of some such cases. It also raises the volume and precision
of debug messaging related to IAA problems.
This patch is EXPERIMENTAL and DIAGNOSTIC ... not intended for merge.
It's also in *addition* to the IAA watchdog timer rework that's already
been merged into 2.6.25-rc1, which will help some systems.
If you use this, run with CONFIG_USB_DEBUG enabled. Most messages
with IAA or IAAD should then be "interesting" in the sense that they
will indicate something odd happening ... maybe something that's
fully worked around, maybe not.
---
drivers/usb/host/ehci-hcd.c | 38 ++++++++++++++++++++++++++++++--------
drivers/usb/host/ehci-q.c | 27 ++++++++++++++++++++++++++-
2 files changed, 56 insertions(+), 9 deletions(-)
--- g26.orig/drivers/usb/host/ehci-hcd.c 2008-02-11 19:18:39.000000000 -0800
+++ g26/drivers/usb/host/ehci-hcd.c 2008-02-13 15:30:56.000000000 -0800
@@ -255,21 +255,33 @@ static void ehci_iaa_watchdog(unsigned l
u32 status, cmd;
spin_lock_irqsave (&ehci->lock, flags);
- WARN_ON(!ehci->reclaim);
status = ehci_readl(ehci, &ehci->regs->status);
cmd = ehci_readl(ehci, &ehci->regs->command);
- ehci_dbg(ehci, "IAA watchdog: status %x cmd %x\n", status, cmd);
/* lost IAA irqs wedge things badly; seen first with a vt8235 */
if (ehci->reclaim) {
- if (status & STS_IAA) {
- ehci_vdbg (ehci, "lost IAA\n");
+ /* STS_IAA means we got the status, but no IRQ. (Or at
+ * best, the IRQ took until just now to arrive.) Missing
+ * CMD_IAAD means we got the effect, but no status or IRQ.
+ */
+ if ((status & STS_IAA) || !(cmd & CMD_IAAD)) {
COUNT (ehci->stats.lost_iaa);
- ehci_writel(ehci, STS_IAA, &ehci->regs->status);
+ if (status & STS_IAA)
+ ehci_writel(ehci, STS_IAA,
+ &ehci->regs->status);
}
- ehci_writel(ehci, cmd & ~CMD_IAAD, &ehci->regs->command);
+ ehci_dbg(ehci, "IAA watchdog%s: status %x cmd %x\n",
+ ((status & STS_IAA) || !(cmd & CMD_IAAD))
+ ? ", lost IAA" : "",
+ status, cmd);
end_unlink_async(ehci);
+
+ } else if (status & STS_IAA) {
+ ehci_writel(ehci, STS_IAA, &ehci->regs->status);
+ ehci_dbg(ehci, "IAA watchdog%s: status %x cmd %x\n",
+ ", IAA with empty reclaim",
+ status, cmd);
}
spin_unlock_irqrestore(&ehci->lock, flags);
@@ -602,7 +614,7 @@ static int ehci_run (struct usb_hcd *hcd
static irqreturn_t ehci_irq (struct usb_hcd *hcd)
{
struct ehci_hcd *ehci = hcd_to_ehci (hcd);
- u32 status, pcd_status = 0;
+ u32 status, pcd_status = 0, cmd;
int bh;
spin_lock (&ehci->lock);
@@ -623,7 +635,7 @@ static irqreturn_t ehci_irq (struct usb_
/* clear (just) interrupts */
ehci_writel(ehci, status, &ehci->regs->status);
- ehci_readl(ehci, &ehci->regs->command); /* unblock posted write */
+ cmd = ehci_readl(ehci, &ehci->regs->command);
bh = 0;
#ifdef VERBOSE_DEBUG
@@ -642,6 +654,16 @@ static irqreturn_t ehci_irq (struct usb_
bh = 1;
}
+ /* Cope with silicon bug where IAA sometimes isn't set, but
+ * IAAD is cleared ... clearing should be a side effect of
+ * setting IAA. So assume that when we expect IAA but neither
+ * IAA nor IAAD are set, we should act as if IAA was reported.
+ */
+ if (ehci->reclaim && !(status & STS_IAA) && !(cmd & CMD_IAAD)) {
+ ehci_dbg(ehci, "IAAD cleared without IAA\n");
+ status |= STS_IAA;
+ }
+
/* complete the unlinking of some qh [4.15.2.3] */
if (status & STS_IAA) {
COUNT (ehci->stats.reclaim);
--- g26.orig/drivers/usb/host/ehci-q.c 2008-02-11 19:18:39.000000000 -0800
+++ g26/drivers/usb/host/ehci-q.c 2008-02-13 17:44:48.000000000 -0800
@@ -1018,6 +1018,19 @@ static void end_unlink_async (struct ehc
timer_action (ehci, TIMER_ASYNC_OFF);
}
+ /* IAAD being set after STS_IAA indicates a silicon bug. But some
+ * suspend paths force unlinks to complete; this can be fine.
+ */
+ if (HC_IS_RUNNING(ehci_to_hcd(ehci)->state)) {
+ u32 cmd = ehci_readl(ehci, &ehci->regs->command);
+
+ if (cmd & CMD_IAAD) {
+ ehci_dbg(ehci, "IAAD wasn't clear %08x\n", cmd);
+ cmd &= ~CMD_IAAD;
+ ehci_writel(ehci, cmd, &ehci->regs->command);
+ }
+ }
+
if (next) {
ehci->reclaim = NULL;
start_unlink_async (ehci, next);
@@ -1041,6 +1054,13 @@ static void start_unlink_async (struct e
BUG ();
#endif
+ if (cmd & CMD_IAAD) {
+ ehci_writel(ehci, cmd & ~CMD_IAAD, &ehci->regs->command);
+ cmd = ehci_readl(ehci, &ehci->regs->command);
+ ehci_err(ehci, "IAAD already set%s\n",
+ (cmd & CMD_IAAD) ? "; clear failed" : "");
+ }
+
/* stop async schedule right now? */
if (unlikely (qh == ehci->async)) {
/* can't get here without STS_ASS set */
@@ -1077,7 +1097,12 @@ static void start_unlink_async (struct e
cmd |= CMD_IAAD;
ehci_writel(ehci, cmd, &ehci->regs->command);
- (void)ehci_readl(ehci, &ehci->regs->command);
+ cmd = ehci_readl(ehci, &ehci->regs->command);
+ if (!(cmd & CMD_IAAD)) {
+ if (!(ehci_readl(ehci, &ehci->regs->status) & STS_IAA))
+ ehci_err(ehci, "write of IAAD didn't take? %08x\n",
+ cmd);
+ }
iaa_watchdog_start(ehci);
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists