[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20241220085027.7692-1-jinjian.song@fibocom.com>
Date: Fri, 20 Dec 2024 16:50:27 +0800
From: Jinjian Song <jinjian.song@...ocom.com>
To: ryazanov.s.a@...il.com,
Jinjian Song <jinjian.song@...ocom.com>,
chandrashekar.devegowda@...el.com,
chiranjeevi.rapolu@...ux.intel.com,
haijun.liu@...iatek.com,
ricardo.martinez@...ux.intel.com,
loic.poulain@...aro.org,
johannes@...solutions.net,
davem@...emloft.net,
edumazet@...gle.com,
kuba@...nel.org,
pabeni@...hat.com
Cc: andrew+netdev@...n.ch,
angelogioacchino.delregno@...labora.com,
corbet@....net,
danielwinkler@...gle.com,
helgaas@...nel.org,
horms@...nel.org,
korneld@...gle.com,
linux-arm-kernel@...ts.infradead.org,
linux-doc@...r.kernel.org,
linux-kernel@...r.kernel.org,
linux-mediatek@...ts.infradead.org,
matthias.bgg@...il.com,
netdev@...r.kernel.org
Subject: Re: [net v2] net: wwan: t7xx: Fix FSM command timeout issue
From: Sergey Ryazanov <ryazanov.s.a@...il.com>
>> Fixes: d785ed945de6 ("net: wwan: t7xx: PCIe reset rescan")
>
>The completion waiting was introduced in a different commit. I believe,
>the fix tag should be 13e920d93e37 ("net: wwan: t7xx: Add core components")
>
Got it.
[...]
>> if (cmd->flag & FSM_CMD_FLAG_WAIT_FOR_COMPLETION) {
>> *cmd->ret = result;
>
>The memory for the result storage is allocated on the stack as well. And
>writing it unconditionally can cause unexpected consequences.
>
Got it.
[...]
>> wait_ret = wait_for_completion_timeout(&done,
>> msecs_to_jiffies(FSM_CMD_TIMEOUT_MS));
>> - if (!wait_ret)
>> + if (!wait_ret) {
>> + cmd->done = NULL;
>
>We cannot access the command memory here, since fsm_finish_command()
>could release it already.
>
Got it.
[...]
>Here we have an ownership transfer problem and a driver author has tried
>to solve it, but as noticed, we are still experiencing issues in case of
>timeout.
>
>The command completion routine should not release the command memory
>unconditionally. Looks like the references counting approach should help
>us here. E.g.
>1. grab a reference before we put a command into the queue
>1.1. grab an extra reference if we are going to wait the completion
>2. release the reference as soon as we are done with the command execution
>3. in case of completion waiting release the reference as soon as we are
>done with waiting due to completion or timeout
>
>Could you try the following patch? Please note, besides the reference
>counter introduction it also moves completion and result storage inside
>the command structure as advised by the completion documentation.
>
Hi Sergey,
Yes, the patch works fine, needs some minor modifications, could we
feedback to the driver author to merge these changes.
diff --git a/drivers/net/wwan/t7xx/t7xx_state_monitor.c b/drivers/net/wwan/t7xx/t7xx_state_monitor.c
index 3931c7a13f5a..265c40b29f56 100644
--- a/drivers/net/wwan/t7xx/t7xx_state_monitor.c
+++ b/drivers/net/wwan/t7xx/t7xx_state_monitor.c
@@ -104,14 +104,20 @@ void t7xx_fsm_broadcast_state(struct t7xx_fsm_ctl *ctl, enum md_state state)
fsm_state_notify(ctl->md, state);
}
+static void fsm_release_command(struct kref *ref)
+{
+ struct t7xx_fsm_command *cmd = container_of(ref, typeof(*cmd), refcnt);
+ kfree(cmd);
+}
+
static void fsm_finish_command(struct t7xx_fsm_ctl *ctl, struct t7xx_fsm_command *cmd, int result)
{
if (cmd->flag & FSM_CMD_FLAG_WAIT_FOR_COMPLETION) {
- *cmd->ret = result;
- complete_all(cmd->done);
+ cmd->result = result;
+ complete_all(&cmd->done);
}
- kfree(cmd);
+ kref_put(&cmd->refcnt, fsm_release_command);
}
static void fsm_del_kf_event(struct t7xx_fsm_event *event)
@@ -475,7 +481,6 @@ static int fsm_main_thread(void *data)
int t7xx_fsm_append_cmd(struct t7xx_fsm_ctl *ctl, enum t7xx_fsm_cmd_state cmd_id, unsigned int flag)
{
- DECLARE_COMPLETION_ONSTACK(done);
struct t7xx_fsm_command *cmd;
unsigned long flags;
int ret;
@@ -487,11 +492,13 @@ int t7xx_fsm_append_cmd(struct t7xx_fsm_ctl *ctl, enum t7xx_fsm_cmd_state cmd_id
INIT_LIST_HEAD(&cmd->entry);
cmd->cmd_id = cmd_id;
cmd->flag = flag;
+ kref_init(&cmd->refcnt);
if (flag & FSM_CMD_FLAG_WAIT_FOR_COMPLETION) {
- cmd->done = &done;
- cmd->ret = &ret;
+ init_completion(&cmd->done);
+ kref_get(&cmd->refcnt);
}
+ kref_get(&cmd->refcnt);
spin_lock_irqsave(&ctl->command_lock, flags);
list_add_tail(&cmd->entry, &ctl->command_queue);
spin_unlock_irqrestore(&ctl->command_lock, flags);
@@ -501,11 +508,11 @@ int t7xx_fsm_append_cmd(struct t7xx_fsm_ctl *ctl, enum t7xx_fsm_cmd_state cmd_id
if (flag & FSM_CMD_FLAG_WAIT_FOR_COMPLETION) {
unsigned long wait_ret;
- wait_ret = wait_for_completion_timeout(&done,
+ wait_ret = wait_for_completion_timeout(&cmd->done,
msecs_to_jiffies(FSM_CMD_TIMEOUT_MS));
- if (!wait_ret)
- return -ETIMEDOUT;
+ ret = wait_ret ? cmd->result : -ETIMEDOUT;
+ kref_put(&cmd->refcnt, fsm_release_command);
return ret;
}
diff --git a/drivers/net/wwan/t7xx/t7xx_state_monitor.h b/drivers/net/wwan/t7xx/t7xx_state_monitor.h
index 7b0a9baf488c..6e0601bb752e 100644
--- a/drivers/net/wwan/t7xx/t7xx_state_monitor.h
+++ b/drivers/net/wwan/t7xx/t7xx_state_monitor.h
@@ -110,8 +110,9 @@ struct t7xx_fsm_command {
struct list_head entry;
enum t7xx_fsm_cmd_state cmd_id;
unsigned int flag;
- struct completion *done;
- int *ret;
+ struct completion done;
+ int result;
+ struct kref refcnt;
};
struct t7xx_fsm_notifier {
Thanks.
Jinjian,
Best Regards.
Powered by blists - more mailing lists