[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20251107111317.69be45a5.michal.pecio@gmail.com>
Date: Fri, 7 Nov 2025 11:13:17 +0100
From: Michal Pecio <michal.pecio@...il.com>
To: Mathias Nyman <mathias.nyman@...el.com>, Greg Kroah-Hartman
<gregkh@...uxfoundation.org>
Cc: linux-usb@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: [PATCH] usb: xhci: Assume that endpoints halt as specified
xHCI 4.8.3 recommends that software should simply assume endpoints to
halt after certain events, without looking at the Endpoint Context for
confirmation, because HCs may be slow to update that.
While no cases of such "slowness" appear to be known, different problem
exists on AMD Promontory chipsets: they may halt and generate a transfer
event, but fail to ever update the Endpoint Context at all, at least not
until some command is queued and fails with Context State Error. This is
easily triggered by disconnecting D- of a full speed serial device.
Possibly similar bug in non-AMD hardware has been reported to linux-usb.
In such case, failed TD is given back without erasing from the ring and
endpoint isn't reset. If some URB is unlinked later, Stop Endpoint fails
and its handler resets the endpoint. On next submission it will restart
on the stale TD. Outcome is UAF on success, or another halt on error and
then Dequeue doesn't move and URBs are stuck. Unlinking and resubmitting
the URBs causes unlimited ring expansion if the situation repeats.
This can be solved by ignoring Endpoint Context State and trusting that
endpoints halt when required, except one known case in ancient hardware.
The check for "Already resolving halted ep" becomes redundant, because
for these completion codes we now jump to xhci_handle_halted_endpoint()
which deals with pending EP_HALTED internally.
Link: https://lore.kernel.org/linux-usb/20250311234139.0e73e138@foxbook/
Link: https://lore.kernel.org/linux-usb/20250918055527.4157212-1-zhangjinpeng@kylinos.cn/
Signed-off-by: Michal Pecio <michal.pecio@...il.com>
---
drivers/usb/host/xhci-ring.c | 73 ++++++++++++------------------------
1 file changed, 23 insertions(+), 50 deletions(-)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index fc0043ca85a4..a038464bd6ac 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2185,24 +2185,31 @@ static void xhci_clear_hub_tt_buffer(struct xhci_hcd *xhci, struct xhci_td *td,
* External device side is also halted in functional stall cases. Class driver
* will clear the device halt with a CLEAR_FEATURE(ENDPOINT_HALT) request later.
*/
-static bool xhci_halted_host_endpoint(struct xhci_ep_ctx *ep_ctx, unsigned int comp_code)
+static bool xhci_halted_host_endpoint(struct xhci_hcd *xhci, struct xhci_ep_ctx *ep_ctx,
+ unsigned int comp_code)
{
- /* Stall halts both internal and device side endpoint */
- if (comp_code == COMP_STALL_ERROR)
- return true;
+ int ep_type = CTX_TO_EP_TYPE(le32_to_cpu(ep_ctx->ep_info2));
- /* TRB completion codes that may require internal halt cleanup */
- if (comp_code == COMP_USB_TRANSACTION_ERROR ||
- comp_code == COMP_BABBLE_DETECTED_ERROR ||
- comp_code == COMP_SPLIT_TRANSACTION_ERROR)
+ switch (comp_code) {
+ case COMP_STALL_ERROR:
+ /* on xHCI this always halts, including protocol stall */
+ return true;
+ case COMP_BABBLE_DETECTED_ERROR:
/*
* The 0.95 spec says a babbling control endpoint is not halted.
* The 0.96 spec says it is. Some HW claims to be 0.95
* compliant, but it halts the control endpoint anyway.
* Check endpoint context if endpoint is halted.
*/
- if (GET_EP_CTX_STATE(ep_ctx) == EP_STATE_HALTED)
- return true;
+ if (xhci->hci_version <= 0x95 && ep_type == CTRL_EP)
+ return GET_EP_CTX_STATE(ep_ctx) == EP_STATE_HALTED;
+
+ fallthrough;
+ case COMP_USB_TRANSACTION_ERROR:
+ case COMP_SPLIT_TRANSACTION_ERROR:
+ /* these errors halt all non-isochronous endpoints */
+ return ep_type != ISOC_IN_EP && ep_type != ISOC_OUT_EP;
+ }
return false;
}
@@ -2239,41 +2246,9 @@ static void finish_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
* the ring dequeue pointer or take this TD off any lists yet.
*/
return;
- case COMP_USB_TRANSACTION_ERROR:
- case COMP_BABBLE_DETECTED_ERROR:
- case COMP_SPLIT_TRANSACTION_ERROR:
- /*
- * If endpoint context state is not halted we might be
- * racing with a reset endpoint command issued by a unsuccessful
- * stop endpoint completion (context error). In that case the
- * td should be on the cancelled list, and EP_HALTED flag set.
- *
- * Or then it's not halted due to the 0.95 spec stating that a
- * babbling control endpoint should not halt. The 0.96 spec
- * again says it should. Some HW claims to be 0.95 compliant,
- * but it halts the control endpoint anyway.
- */
- if (GET_EP_CTX_STATE(ep_ctx) != EP_STATE_HALTED) {
- /*
- * If EP_HALTED is set and TD is on the cancelled list
- * the TD and dequeue pointer will be handled by reset
- * ep command completion
- */
- if ((ep->ep_state & EP_HALTED) &&
- !list_empty(&td->cancelled_td_list)) {
- xhci_dbg(xhci, "Already resolving halted ep for 0x%llx\n",
- (unsigned long long)xhci_trb_virt_to_dma(
- td->start_seg, td->start_trb));
- return;
- }
- /* endpoint not halted, don't reset it */
- break;
- }
- /* Almost same procedure as for STALL_ERROR below */
- xhci_clear_hub_tt_buffer(xhci, td, ep);
- xhci_handle_halted_endpoint(xhci, ep, td, EP_HARD_RESET);
- return;
- case COMP_STALL_ERROR:
+ }
+
+ if (xhci_halted_host_endpoint(xhci, ep_ctx, trb_comp_code)) {
/*
* xhci internal endpoint state will go to a "halt" state for
* any stall, including default control pipe protocol stall.
@@ -2284,14 +2259,12 @@ static void finish_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
* stall later. Hub TT buffer should only be cleared for FS/LS
* devices behind HS hubs for functional stalls.
*/
- if (ep->ep_index != 0)
+ if (!(ep->ep_index == 0 && trb_comp_code == COMP_STALL_ERROR))
xhci_clear_hub_tt_buffer(xhci, td, ep);
xhci_handle_halted_endpoint(xhci, ep, td, EP_HARD_RESET);
return; /* xhci_handle_halted_endpoint marked td cancelled */
- default:
- break;
}
xhci_dequeue_td(xhci, td, ep_ring, td->status);
@@ -2368,7 +2341,7 @@ static void process_ctrl_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
case COMP_STOPPED_LENGTH_INVALID:
goto finish_td;
default:
- if (!xhci_halted_host_endpoint(ep_ctx, trb_comp_code))
+ if (!xhci_halted_host_endpoint(xhci, ep_ctx, trb_comp_code))
break;
xhci_dbg(xhci, "TRB error %u, halted endpoint index = %u\n",
trb_comp_code, ep->ep_index);
@@ -2979,7 +2952,7 @@ static int handle_tx_event(struct xhci_hcd *xhci,
return 0;
check_endpoint_halted:
- if (xhci_halted_host_endpoint(ep_ctx, trb_comp_code))
+ if (xhci_halted_host_endpoint(xhci, ep_ctx, trb_comp_code))
xhci_handle_halted_endpoint(xhci, ep, td, EP_HARD_RESET);
return 0;
--
2.48.1
Powered by blists - more mailing lists