[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1482475821-18144-2-git-send-email-baolu.lu@linux.intel.com>
Date: Fri, 23 Dec 2016 14:50:21 +0800
From: Lu Baolu <baolu.lu@...ux.intel.com>
To: Mathias Nyman <mathias.nyman@...ux.intel.com>
Cc: linux-usb@...r.kernel.org, linux-kernel@...r.kernel.org,
OGAWA Hirofumi <hirofumi@...l.parknet.co.jp>,
<stable@...r.kernel.org>, Lu Baolu <baolu.lu@...ux.intel.com>
Subject: [PATCH 1/1] xhci: Fix race related to abort operation
From: OGAWA Hirofumi <hirofumi@...l.parknet.co.jp>
Current abort operation has race.
xhci_handle_command_timeout()
xhci_abort_cmd_ring()
xhci_write_64(CMD_RING_ABORT)
xhci_handshake(5s)
do {
check CMD_RING_RUNNING
udelay(1)
...
COMP_CMD_ABORT event
COMP_CMD_STOP event
xhci_handle_stopped_cmd_ring()
restart cmd_ring
CMD_RING_RUNNING become 1 again
} while ()
return -ETIMEDOUT
xhci_write_64(CMD_RING_ABORT)
/* can abort random command */
To do abort operation correctly, we have to wait both of COMP_CMD_STOP
event and negation of CMD_RING_RUNNING.
But like above, while timeout handler is waiting negation of
CMD_RING_RUNNING, event handler can restart cmd_ring. So timeout
handler never be notice negation of CMD_RING_RUNNING, and retry of
CMD_RING_ABORT can abort random command (BTW, I guess retry of
CMD_RING_ABORT was workaround of this race).
To fix this race, this moves xhci_handle_stopped_cmd_ring() to
xhci_abort_cmd_ring(). And timeout handler waits COMP_CMD_STOP event.
At this point, timeout handler is owner of cmd_ring, and safely
restart cmd_ring by using xhci_handle_stopped_cmd_ring().
[FWIW, as bonus, this way would be easily extend to add CMD_RING_PAUSE
operation]
[baolu: release xhci->lock before wait and apply it back again after
wait. This is the reason my signed-off-by is added.]
CC: <stable@...r.kernel.org>
Signed-off-by: OGAWA Hirofumi <hirofumi@...l.parknet.co.jp>
Signed-off-by: Lu Baolu <baolu.lu@...ux.intel.com>
Signed-off-by: Mathias Nyman <mathias.nyman@...ux.intel.com>
---
drivers/usb/host/xhci-mem.c | 1 +
drivers/usb/host/xhci-ring.c | 171 ++++++++++++++++++++++---------------------
drivers/usb/host/xhci.h | 1 +
3 files changed, 91 insertions(+), 82 deletions(-)
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 536f572..70c9204 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -2379,6 +2379,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
/* init command timeout work */
INIT_DELAYED_WORK(&xhci->cmd_timer, xhci_handle_command_timeout);
+ init_completion(&xhci->cmd_ring_stop_completion);
page_size = readl(&xhci->op_regs->page_size);
xhci_dbg_trace(xhci, trace_xhci_dbg_init,
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 63af013..d5fde9a 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -265,23 +265,71 @@ static bool xhci_mod_cmd_timer(struct xhci_hcd *xhci, unsigned long delay)
return mod_delayed_work(system_wq, &xhci->cmd_timer, delay);
}
-static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
+static struct xhci_command *xhci_next_queued_cmd(struct xhci_hcd *xhci)
{
+ return list_first_entry_or_null(&xhci->cmd_list, struct xhci_command,
+ cmd_list);
+}
+
+/*
+ * Turn all commands on command ring with status set to "aborted" to no-op trbs.
+ * If there are other commands waiting then restart the ring and kick the timer.
+ * This must be called with command ring stopped and xhci->lock held.
+ */
+static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci,
+ struct xhci_command *cur_cmd)
+{
+ struct xhci_command *i_cmd;
+ u32 cycle_state;
+
+ /* Turn all aborted commands in list to no-ops, then restart */
+ list_for_each_entry(i_cmd, &xhci->cmd_list, cmd_list) {
+
+ if (i_cmd->status != COMP_CMD_ABORT)
+ continue;
+
+ i_cmd->status = COMP_CMD_STOP;
+
+ xhci_dbg(xhci, "Turn aborted command %p to no-op\n",
+ i_cmd->command_trb);
+ /* get cycle state from the original cmd trb */
+ cycle_state = le32_to_cpu(
+ i_cmd->command_trb->generic.field[3]) & TRB_CYCLE;
+ /* modify the command trb to no-op command */
+ i_cmd->command_trb->generic.field[0] = 0;
+ i_cmd->command_trb->generic.field[1] = 0;
+ i_cmd->command_trb->generic.field[2] = 0;
+ i_cmd->command_trb->generic.field[3] = cpu_to_le32(
+ TRB_TYPE(TRB_CMD_NOOP) | cycle_state);
+
+ /*
+ * caller waiting for completion is called when command
+ * completion event is received for these no-op commands
+ */
+ }
+
+ xhci->cmd_ring_state = CMD_RING_STATE_RUNNING;
+
+ /* ring command ring doorbell to restart the command ring */
+ if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) &&
+ !(xhci->xhc_state & XHCI_STATE_DYING)) {
+ xhci->current_cmd = cur_cmd;
+ xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT);
+ xhci_ring_cmd_db(xhci);
+ }
+}
+
+static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long *flags)
+{
+ unsigned long timeleft;
u64 temp_64;
int ret;
xhci_dbg(xhci, "Abort command ring\n");
- temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
- xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
+ reinit_completion(&xhci->cmd_ring_stop_completion);
- /*
- * Writing the CMD_RING_ABORT bit should cause a cmd completion event,
- * however on some host hw the CMD_RING_RUNNING bit is correctly cleared
- * but the completion event in never sent. Use the cmd timeout timer to
- * handle those cases. Use twice the time to cover the bit polling retry
- */
- xhci_mod_cmd_timer(xhci, 2 * XHCI_CMD_DEFAULT_TIMEOUT);
+ temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
xhci_write_64(xhci, temp_64 | CMD_RING_ABORT,
&xhci->op_regs->cmd_ring);
@@ -301,18 +349,30 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
udelay(1000);
ret = xhci_handshake(&xhci->op_regs->cmd_ring,
CMD_RING_RUNNING, 0, 3 * 1000 * 1000);
- if (ret == 0)
- return 0;
-
- xhci_err(xhci, "Stopped the command ring failed, "
- "maybe the host is dead\n");
- cancel_delayed_work(&xhci->cmd_timer);
- xhci->xhc_state |= XHCI_STATE_DYING;
- xhci_quiesce(xhci);
- xhci_halt(xhci);
- return -ESHUTDOWN;
+ if (ret < 0) {
+ xhci_err(xhci, "Stopped the command ring failed, "
+ "maybe the host is dead\n");
+ xhci->xhc_state |= XHCI_STATE_DYING;
+ xhci_halt(xhci);
+ return -ESHUTDOWN;
+ }
+ }
+ /*
+ * Writing the CMD_RING_ABORT bit should cause a cmd completion event,
+ * however on some host hw the CMD_RING_RUNNING bit is correctly cleared
+ * but the completion event in never sent. Wait 2 secs (arbitrary
+ * number) to handle those cases after negation of CMD_RING_RUNNING.
+ */
+ spin_unlock_irqrestore(&xhci->lock, *flags);
+ timeleft = wait_for_completion_timeout(&xhci->cmd_ring_stop_completion,
+ 2 * HZ);
+ spin_lock_irqsave(&xhci->lock, *flags);
+ if (!timeleft) {
+ xhci_dbg(xhci, "No stop event for abort, ring start fail?\n");
+ xhci_cleanup_command_queue(xhci);
+ } else {
+ xhci_handle_stopped_cmd_ring(xhci, xhci_next_queued_cmd(xhci));
}
-
return 0;
}
@@ -1216,81 +1276,28 @@ void xhci_cleanup_command_queue(struct xhci_hcd *xhci)
xhci_complete_del_and_free_cmd(cur_cmd, COMP_CMD_ABORT);
}
-/*
- * Turn all commands on command ring with status set to "aborted" to no-op trbs.
- * If there are other commands waiting then restart the ring and kick the timer.
- * This must be called with command ring stopped and xhci->lock held.
- */
-static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci,
- struct xhci_command *cur_cmd)
-{
- struct xhci_command *i_cmd, *tmp_cmd;
- u32 cycle_state;
-
- /* Turn all aborted commands in list to no-ops, then restart */
- list_for_each_entry_safe(i_cmd, tmp_cmd, &xhci->cmd_list,
- cmd_list) {
-
- if (i_cmd->status != COMP_CMD_ABORT)
- continue;
-
- i_cmd->status = COMP_CMD_STOP;
-
- xhci_dbg(xhci, "Turn aborted command %p to no-op\n",
- i_cmd->command_trb);
- /* get cycle state from the original cmd trb */
- cycle_state = le32_to_cpu(
- i_cmd->command_trb->generic.field[3]) & TRB_CYCLE;
- /* modify the command trb to no-op command */
- i_cmd->command_trb->generic.field[0] = 0;
- i_cmd->command_trb->generic.field[1] = 0;
- i_cmd->command_trb->generic.field[2] = 0;
- i_cmd->command_trb->generic.field[3] = cpu_to_le32(
- TRB_TYPE(TRB_CMD_NOOP) | cycle_state);
-
- /*
- * caller waiting for completion is called when command
- * completion event is received for these no-op commands
- */
- }
-
- xhci->cmd_ring_state = CMD_RING_STATE_RUNNING;
-
- /* ring command ring doorbell to restart the command ring */
- if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) &&
- !(xhci->xhc_state & XHCI_STATE_DYING)) {
- xhci->current_cmd = cur_cmd;
- xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT);
- xhci_ring_cmd_db(xhci);
- }
- return;
-}
-
-
void xhci_handle_command_timeout(struct work_struct *work)
{
struct xhci_hcd *xhci;
int ret;
unsigned long flags;
u64 hw_ring_state;
- bool second_timeout = false;
xhci = container_of(to_delayed_work(work), struct xhci_hcd, cmd_timer);
/* mark this command to be cancelled */
spin_lock_irqsave(&xhci->lock, flags);
- if (xhci->current_cmd) {
- if (xhci->current_cmd->status == COMP_CMD_ABORT)
- second_timeout = true;
+ if (xhci->current_cmd)
xhci->current_cmd->status = COMP_CMD_ABORT;
- }
/* Make sure command ring is running before aborting it */
hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) &&
(hw_ring_state & CMD_RING_RUNNING)) {
+ /* Prevent new doorbell, and start command abort */
+ xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
xhci_dbg(xhci, "Command timeout\n");
- ret = xhci_abort_cmd_ring(xhci);
+ ret = xhci_abort_cmd_ring(xhci, &flags);
if (unlikely(ret == -ESHUTDOWN)) {
xhci_err(xhci, "Abort command ring failed\n");
xhci_cleanup_command_queue(xhci);
@@ -1304,9 +1311,9 @@ void xhci_handle_command_timeout(struct work_struct *work)
goto time_out_completed;
}
- /* command ring failed to restart, or host removed. Bail out */
- if (second_timeout || xhci->xhc_state & XHCI_STATE_REMOVING) {
- xhci_dbg(xhci, "command timed out twice, ring start fail?\n");
+ /* host removed. Bail out */
+ if (xhci->xhc_state & XHCI_STATE_REMOVING) {
+ xhci_dbg(xhci, "host removed, ring start fail?\n");
xhci_cleanup_command_queue(xhci);
goto time_out_completed;
@@ -1357,7 +1364,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
/* If CMD ring stopped we own the trbs between enqueue and dequeue */
if (cmd_comp_code == COMP_CMD_STOP) {
- xhci_handle_stopped_cmd_ring(xhci, cmd);
+ complete_all(&xhci->cmd_ring_stop_completion);
return;
}
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index da5bfd6..c525722 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1572,6 +1572,7 @@ struct xhci_hcd {
struct list_head cmd_list;
unsigned int cmd_ring_reserved_trbs;
struct delayed_work cmd_timer;
+ struct completion cmd_ring_stop_completion;
struct xhci_command *current_cmd;
struct xhci_ring *event_ring;
struct xhci_erst erst;
--
2.1.4
Powered by blists - more mailing lists