[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250406175032.12b7d284@foxbook>
Date: Sun, 6 Apr 2025 17:50:32 +0200
From: Michał Pecio <michal.pecio@...il.com>
To: Alan Stern <stern@...land.harvard.edu>, Paul Menzel
<pmenzel@...gen.mpg.de>
Cc: Mathias Nyman <mathias.nyman@...ux.intel.com>, Mathias Nyman
<mathias.nyman@...el.com>, Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
linux-usb@...r.kernel.org, LKML <linux-kernel@...r.kernel.org>
Subject: Re: xhci: WARN Set TR Deq Ptr cmd failed due to incorrect slot or
ep state.
On Sun, 6 Apr 2025 09:50:08 +0200, Michał Pecio wrote:
> What I found suspicious is that there is also endpoint_disable() and
> I'm not sure where it comes from. Looking at core code, it seems to
> often be followed by endpoint_reset(), but the log doesn't show that.
There might be something to it. I went through my collection of card
readers and found a reproducible case where endpoint_reset() is called
with NULL host_ep->hcpriv and it bails out without even an xhci_dbg().
Reloading ums-realtek with the reader already connected and no card:
[ 6kwi 17:30] usbcore: deregistering interface driver ums-realtek
[ +0,679251] ums-realtek 14-1:1.0: USB Mass Storage device detected
[ +0,161730] scsi host9: usb-storage 14-1:1.0
[ +0,000193] usbcore: registered new interface driver ums-realtek
[ +1,018198] scsi 9:0:0:0: Direct-Access Generic- Multi-Card 1.00 PQ: 0 ANSI: 0 CCS
[ +0,000241] sd 9:0:0:0: Attached scsi generic sg1 type 0
[ +0,001182] sd 9:0:0:0: [sdb] Media removed, stopped polling
[ +0,000517] sd 9:0:0:0: [sdb] Attached SCSI removable disk
[ +0,000952] usb 14-1: XXX ep 4 is now EP_STALLED
[ +0,000907] xhci_hcd 0000:0a:00.0: xhci_endpoint_reset ep 4 udev 0000000000000000 slot_id -1 vdev ffffffffffffffff
[ +0,000030] usb 14-1: XXX URB ffff88811af85f00 queued before clearing halt
[ +30,400178] xhci_hcd 0000:0a:00.0: xhci_endpoint_reset ep 0 udev 0000000000000000 slot_id -1 vdev ffffffffffffffff
[ +0,112403] usb 14-1: reset high-speed USB device number 3 using xhci_hcd
[ +0,124145] xhci_hcd 0000:0a:00.0: xhci_endpoint_reset ep 0 udev 0000000000000000 slot_id -1 vdev ffffffffffffffff
[ +0,027202] usb 14-1: XXX ep 4 still EP_STALLED on init, clearing
[ +0,000995] xhci_hcd 0000:0a:00.0: xhci_endpoint_reset ep 1 udev 0000000000000000 slot_id -1 vdev ffffffffffffffff
[ +0,000013] xhci_hcd 0000:0a:00.0: xhci_endpoint_reset ep 4 udev 0000000000000000 slot_id -1 vdev ffffffffffffffff
Not sure what's happening, but at least it shows that EP_STALLED can
still be set when the endpoint is re-enabled and my patch clears it.
This bug only came out now because besides clearing this new EP_STALLED
flag, xhci_endpoint_reset() is practically a no-op in most cases.
v2 debug patch below for anyone interested.
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 3f918fea7aea..0ebf7980780f 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1463,6 +1463,10 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
if (!virt_dev->eps[ep_index].new_ring)
return -ENOMEM;
+ if (virt_dev->eps[ep_index].ep_state & EP_STALLED)
+ dev_err(&udev->dev, "XXX ep %d still EP_STALLED on init, clearing\n", ep_index);
+ virt_dev->eps[ep_index].ep_state &= ~EP_STALLED;
+
virt_dev->eps[ep_index].skip = false;
ep_ring = virt_dev->eps[ep_index].new_ring;
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 0c5c0769db74..308b30763dc6 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2544,6 +2544,7 @@ static void process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
struct xhci_slot_ctx *slot_ctx;
u32 trb_comp_code;
u32 remaining, requested, ep_trb_len;
+ u32 ep_index = TRB_TO_EP_ID(le32_to_cpu(event->flags)) - 1;
slot_ctx = xhci_get_slot_ctx(xhci, ep->vdev->out_ctx);
trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
@@ -2584,6 +2585,7 @@ static void process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
xhci_handle_halted_endpoint(xhci, ep, td, EP_SOFT_RESET);
return;
case COMP_STALL_ERROR:
+ dev_err(&td->urb->dev->dev, "XXX ep %d is now EP_STALLED\n", ep_index);
ep->ep_state |= EP_STALLED;
break;
default:
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 83a4adf57bae..adb6efce5d31 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1606,9 +1606,11 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
}
/* Class driver might not be aware ep halted due to async URB giveback */
- if (*ep_state & EP_STALLED)
- dev_dbg(&urb->dev->dev, "URB %p queued before clearing halt\n",
+ if (*ep_state & EP_STALLED) {
+ dev_err(&urb->dev->dev, "XXX URB %p queued before clearing halt\n",
urb);
+ //dump_stack();
+ }
switch (usb_endpoint_type(&urb->ep->desc)) {
@@ -1772,6 +1774,11 @@ static int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
/* In these cases no commands are pending but the endpoint is stopped */
if (ep->ep_state & (EP_CLEARING_TT | EP_STALLED)) {
+ struct xhci_ep_ctx *ep_ctx = xhci_get_ep_ctx(xhci, vdev->out_ctx, ep_index);
+ int ctx_state = GET_EP_CTX_STATE(ep_ctx);
+ if (ctx_state != EP_STATE_STOPPED)
+ dev_err(&urb->dev->dev, "XXX WTF ep_state %d ctx_state %d\n", ep->ep_state, ctx_state);
+
/* and cancelled TDs can be given back right away */
xhci_dbg(xhci, "Invalidating TDs instantly on slot %d ep %d in state 0x%x\n",
urb->dev->slot_id, ep_index, ep->ep_state);
@@ -3177,6 +3184,11 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd,
xhci = hcd_to_xhci(hcd);
ep_index = xhci_get_endpoint_index(&host_ep->desc);
+ udev = (struct usb_device *) host_ep->hcpriv;
+
+ xhci_info(xhci, "%s ep %d udev %px slot_id %d vdev %px\n", __func__, ep_index, udev,
+ udev ? udev->slot_id : -1,
+ udev ? xhci->devs[udev->slot_id] : (void*)~0L);
/*
* Usb core assumes a max packet value for ep0 on FS devices until the
@@ -3201,7 +3213,6 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd,
if (!host_ep->hcpriv)
return;
- udev = (struct usb_device *) host_ep->hcpriv;
vdev = xhci->devs[udev->slot_id];
if (!udev->slot_id || !vdev)
@@ -3211,6 +3222,8 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd,
spin_lock_irqsave(&xhci->lock, flags);
+ if (ep->ep_state & EP_STALLED)
+ dev_err(&udev->dev, "XXX ep %d no longer EP_STALLED\n", ep_index);
ep->ep_state &= ~EP_STALLED;
/* Bail out if toggle is already being cleared by a endpoint reset */
Powered by blists - more mailing lists