[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20240701112139.GX17134@kernel.org>
Date: Mon, 1 Jul 2024 12:21:39 +0100
From: Simon Horman <horms@...nel.org>
To: Rushil Gupta <rushilg@...gle.com>
Cc: netdev@...r.kernel.org, jeroendb@...gle.com, pkaligineedi@...gle.com,
davem@...emloft.net, kuba@...nel.org, edumazet@...gle.com,
pabeni@...hat.com, willemb@...gle.com, hramamurthy@...gle.com,
Shailend Chand <shailend@...gle.com>,
Ziwei Xiao <ziweixiao@...gle.com>
Subject: Re: [PATCH] gve: Add retry logic for recoverable adminq errors
On Fri, Jun 28, 2024 at 06:54:46PM +0000, Rushil Gupta wrote:
> From: Jeroen de Borst <jeroendb@...gle.com>
>
> An adminq command is retried if it fails with an ETIME error code
> which translates to the deadline exceeded error for the device.
> The create and destroy adminq commands are now managed via a common
> method. This method keeps track of return codes for each queue and retries
> the commands for the queues that failed with ETIME.
> Other adminq commands that do not require queue level granularity are
> simply retried in gve_adminq_execute_cmd.
>
> Signed-off-by: Rushil Gupta <rushilg@...gle.com>
> Signed-off-by: Jeroen de Borst <jeroendb@...gle.com>
> Reviewed-by: Shailend Chand <shailend@...gle.com>
> Reviewed-by: Ziwei Xiao <ziweixiao@...gle.com>
> ---
> drivers/net/ethernet/google/gve/gve_adminq.c | 197 ++++++++++++-------
> drivers/net/ethernet/google/gve/gve_adminq.h | 5 +
> 2 files changed, 129 insertions(+), 73 deletions(-)
>
> diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
Hi Rushil,
Some minor feedback from my side.
...
> +static int gve_adminq_manage_queues(struct gve_priv *priv,
> + gve_adminq_queue_cmd *cmd,
> + u32 start_id, u32 num_queues)
> +{
> + u32 cmd_idx, queue_idx, ret_code_idx;
> + int queue_done = -1;
> + int *queues_waiting;
> + int retry_cnt = 0;
> + int retry_needed;
> + int *ret_codes;
> + int *commands;
> + int err;
> + int ret;
> +
> + queues_waiting = kvcalloc(num_queues, sizeof(int), GFP_KERNEL);
> + if (!queues_waiting)
> + return -ENOMEM;
> + ret_codes = kvcalloc(num_queues, sizeof(int), GFP_KERNEL);
> + if (!ret_codes) {
> + err = -ENOMEM;
> + goto free_with_queues_waiting;
> + }
> + commands = kvcalloc(num_queues, sizeof(int), GFP_KERNEL);
> + if (!commands) {
> + err = -ENOMEM;
> + goto free_with_ret_codes;
> + }
> +
> + for (queue_idx = 0; queue_idx < num_queues; queue_idx++)
> + queues_waiting[queue_idx] = start_id + queue_idx;
> + do {
> + retry_needed = 0;
> + queue_idx = 0;
> + while (queue_idx < num_queues) {
> + cmd_idx = 0;
> + while (queue_idx < num_queues) {
> + if (queues_waiting[queue_idx] != queue_done) {
> + err = cmd(priv, queues_waiting[queue_idx]);
It looks like this line, and some others in this patch, could trivially
be wrapped so they are no longer than 80 columns wide. As is still
preferred for networking code.
Flagged by checkpatch.pl --max-line-length=80
> + if (err == -ENOMEM)
> + break;
> + if (err)
> + goto free_with_commands;
> + commands[cmd_idx++] = queue_idx;
> + }
> + queue_idx++;
> + }
> +
> + if (queue_idx < num_queues)
> + dev_dbg(&priv->pdev->dev,
> + "Issued %d of %d batched commands\n",
> + queue_idx, num_queues);
> +
> + err = gve_adminq_kick_and_wait(priv, cmd_idx, ret_codes);
> + if (err)
> + goto free_with_commands;
> +
> + for (ret_code_idx = 0; ret_code_idx < cmd_idx; ret_code_idx++) {
> + if (ret_codes[ret_code_idx] == 0) {
> + queues_waiting[commands[ret_code_idx]] = queue_done;
> + } else if (ret_codes[ret_code_idx] != -ETIME) {
> + ret = ret_codes[ret_code_idx];
> + goto free_with_commands;
> + } else {
> + retry_needed++;
> + }
> + }
> +
> + if (retry_needed)
> + dev_dbg(&priv->pdev->dev,
> + "Issued %d batched commands, %d needed a retry\n",
> + cmd_idx, retry_needed);
> + }
> + } while (retry_needed && ++retry_cnt < GVE_ADMINQ_RETRY_COUNT);
> +
> + ret = retry_needed ? -ETIME : 0;
> +
> +free_with_commands:
> + kvfree(commands);
> + commands = NULL;
> +free_with_ret_codes:
> + kvfree(ret_codes);
> + ret_codes = NULL;
> +free_with_queues_waiting:
> + kvfree(queues_waiting);
> + queues_waiting = NULL;
> + if (err)
> + return err;
I'm unsure if this can occur in practice, because it seems to depend
on the do loop above running , but cmd() not being executed at all.
However, FWIIW, Smatch flags that err may be used uninitialised here.
> + return ret;
> +}
...
Powered by blists - more mailing lists