[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230801101545.519944-1-przemyslaw.kitszel@intel.com>
Date: Tue, 1 Aug 2023 06:15:45 -0400
From: Przemek Kitszel <przemyslaw.kitszel@...el.com>
To: intel-wired-lan@...ts.osuosl.org,
Tony Nguyen <anthony.l.nguyen@...el.com>
Cc: netdev@...r.kernel.org,
Jacob Keller <jacob.e.keller@...el.com>,
Jesse Brandeburg <jesse.brandeburg@...el.com>,
Przemek Kitszel <przemyslaw.kitszel@...el.com>
Subject: [PATCH] ice: split ice_aq_wait_for_event() func into two
Mitigate race between registering on wait list and receiving
AQ Response from FW.
ice_aq_prep_for_event() should be called before sending AQ command,
ice_aq_wait_for_event() should be called after sending AQ command,
to wait for AQ Response.
struct ice_aq_task is exposed to callers, what takes burden of memory
ownership out from AQ-wait family of functions.
Embed struct ice_rq_event_info event into struct ice_aq_task
(instead of it being a ptr), to remove some more code from the callers.
Additional fix: one of the checks in ice_aq_check_events() was off by one.
Please note, that this was found by reading the code,
an actual race has not yet materialized.
Signed-off-by: Przemek Kitszel <przemyslaw.kitszel@...el.com>
---
drivers/net/ethernet/intel/ice/ice.h | 21 +++-
.../net/ethernet/intel/ice/ice_fw_update.c | 45 +++++----
drivers/net/ethernet/intel/ice/ice_main.c | 98 +++++++++----------
3 files changed, 92 insertions(+), 72 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 34be1cb1e28f..3ac645afbc8d 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -931,8 +931,25 @@ void ice_fdir_release_flows(struct ice_hw *hw);
void ice_fdir_replay_flows(struct ice_hw *hw);
void ice_fdir_replay_fltrs(struct ice_pf *pf);
int ice_fdir_create_dflt_rules(struct ice_pf *pf);
-int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
- struct ice_rq_event_info *event);
+
+enum ice_aq_task_state {
+ ICE_AQ_TASK_NOT_PREPARED,
+ ICE_AQ_TASK_WAITING,
+ ICE_AQ_TASK_COMPLETE,
+ ICE_AQ_TASK_CANCELED,
+};
+
+struct ice_aq_task {
+ struct hlist_node entry;
+ struct ice_rq_event_info event;
+ enum ice_aq_task_state state;
+ u16 opcode;
+};
+
+void ice_aq_prep_for_event(struct ice_pf *pf, struct ice_aq_task *task,
+ u16 opcode);
+int ice_aq_wait_for_event(struct ice_pf *pf, struct ice_aq_task *task,
+ unsigned long timeout);
int ice_open(struct net_device *netdev);
int ice_open_internal(struct net_device *netdev);
int ice_stop(struct net_device *netdev);
diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c
index 3dc5662d62a6..319a2d6fe26c 100644
--- a/drivers/net/ethernet/intel/ice/ice_fw_update.c
+++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c
@@ -293,16 +293,17 @@ ice_write_one_nvm_block(struct ice_pf *pf, u16 module, u32 offset,
{
u16 completion_module, completion_retval;
struct device *dev = ice_pf_to_dev(pf);
- struct ice_rq_event_info event;
+ struct ice_aq_task task = {};
struct ice_hw *hw = &pf->hw;
+ struct ice_aq_desc *desc;
u32 completion_offset;
int err;
- memset(&event, 0, sizeof(event));
-
dev_dbg(dev, "Writing block of %u bytes for module 0x%02x at offset %u\n",
block_size, module, offset);
+ ice_aq_prep_for_event(pf, &task, ice_aqc_opc_nvm_write);
+
err = ice_aq_update_nvm(hw, module, offset, block_size, block,
last_cmd, 0, NULL);
if (err) {
@@ -319,19 +320,20 @@ ice_write_one_nvm_block(struct ice_pf *pf, u16 module, u32 offset,
* is conservative and is intended to prevent failure to update when
* firmware is slow to respond.
*/
- err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write, 15 * HZ, &event);
+ err = ice_aq_wait_for_event(pf, &task, 15 * HZ);
if (err) {
dev_err(dev, "Timed out while trying to flash module 0x%02x with block of size %u at offset %u, err %d\n",
module, block_size, offset, err);
NL_SET_ERR_MSG_MOD(extack, "Timed out waiting for firmware");
return -EIO;
}
- completion_module = le16_to_cpu(event.desc.params.nvm.module_typeid);
- completion_retval = le16_to_cpu(event.desc.retval);
+ desc = &task.event.desc;
+ completion_module = le16_to_cpu(desc->params.nvm.module_typeid);
+ completion_retval = le16_to_cpu(desc->retval);
- completion_offset = le16_to_cpu(event.desc.params.nvm.offset_low);
- completion_offset |= event.desc.params.nvm.offset_high << 16;
+ completion_offset = le16_to_cpu(desc->params.nvm.offset_low);
+ completion_offset |= desc->params.nvm.offset_high << 16;
if (completion_module != module) {
dev_err(dev, "Unexpected module_typeid in write completion: got 0x%x, expected 0x%x\n",
@@ -363,8 +365,8 @@ ice_write_one_nvm_block(struct ice_pf *pf, u16 module, u32 offset,
*/
if (reset_level && last_cmd && module == ICE_SR_1ST_NVM_BANK_PTR) {
if (hw->dev_caps.common_cap.pcie_reset_avoidance) {
- *reset_level = (event.desc.params.nvm.cmd_flags &
- ICE_AQC_NVM_RESET_LVL_M);
+ *reset_level = desc->params.nvm.cmd_flags &
+ ICE_AQC_NVM_RESET_LVL_M;
dev_dbg(dev, "Firmware reported required reset level as %u\n",
*reset_level);
} else {
@@ -479,39 +481,41 @@ ice_erase_nvm_module(struct ice_pf *pf, u16 module, const char *component,
{
u16 completion_module, completion_retval;
struct device *dev = ice_pf_to_dev(pf);
- struct ice_rq_event_info event;
+ struct ice_aq_task task = {};
struct ice_hw *hw = &pf->hw;
+ struct ice_aq_desc *desc;
struct devlink *devlink;
int err;
dev_dbg(dev, "Beginning erase of flash component '%s', module 0x%02x\n", component, module);
- memset(&event, 0, sizeof(event));
-
devlink = priv_to_devlink(pf);
devlink_flash_update_timeout_notify(devlink, "Erasing", component, ICE_FW_ERASE_TIMEOUT);
+ ice_aq_prep_for_event(pf, &task, ice_aqc_opc_nvm_erase);
+
err = ice_aq_erase_nvm(hw, module, NULL);
if (err) {
dev_err(dev, "Failed to erase %s (module 0x%02x), err %d aq_err %s\n",
component, module, err,
ice_aq_str(hw->adminq.sq_last_status));
NL_SET_ERR_MSG_MOD(extack, "Failed to erase flash module");
err = -EIO;
goto out_notify_devlink;
}
- err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_erase, ICE_FW_ERASE_TIMEOUT * HZ, &event);
+ err = ice_aq_wait_for_event(pf, &task, ICE_FW_ERASE_TIMEOUT * HZ);
if (err) {
dev_err(dev, "Timed out waiting for firmware to respond with erase completion for %s (module 0x%02x), err %d\n",
component, module, err);
NL_SET_ERR_MSG_MOD(extack, "Timed out waiting for firmware");
goto out_notify_devlink;
}
- completion_module = le16_to_cpu(event.desc.params.nvm.module_typeid);
- completion_retval = le16_to_cpu(event.desc.retval);
+ desc = &task.event.desc;
+ completion_module = le16_to_cpu(desc->params.nvm.module_typeid);
+ completion_retval = le16_to_cpu(desc->retval);
if (completion_module != module) {
dev_err(dev, "Unexpected module_typeid in erase completion for %s: got 0x%x, expected 0x%x\n",
@@ -560,13 +564,13 @@ ice_switch_flash_banks(struct ice_pf *pf, u8 activate_flags,
u8 *emp_reset_available, struct netlink_ext_ack *extack)
{
struct device *dev = ice_pf_to_dev(pf);
- struct ice_rq_event_info event;
+ struct ice_aq_task task = {};
struct ice_hw *hw = &pf->hw;
u16 completion_retval;
u8 response_flags;
int err;
- memset(&event, 0, sizeof(event));
+ ice_aq_prep_for_event(pf, &task, ice_aqc_opc_nvm_write_activate);
err = ice_nvm_write_activate(hw, activate_flags, &response_flags);
if (err) {
@@ -592,16 +596,15 @@ ice_switch_flash_banks(struct ice_pf *pf, u8 activate_flags,
}
}
- err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write_activate, 30 * HZ,
- &event);
+ err = ice_aq_wait_for_event(pf, &task, 30 * HZ);
if (err) {
dev_err(dev, "Timed out waiting for firmware to switch active flash banks, err %d\n",
err);
NL_SET_ERR_MSG_MOD(extack, "Timed out waiting for firmware");
return err;
}
- completion_retval = le16_to_cpu(event.desc.retval);
+ completion_retval = le16_to_cpu(task.event.desc.retval);
if (completion_retval) {
dev_err(dev, "Firmware failed to switch active flash banks aq_err %s\n",
ice_aq_str((enum ice_aq_err)completion_retval));
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index a73895483e6c..7d2e50968277 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1250,88 +1250,86 @@ ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event)
return status;
}
-enum ice_aq_task_state {
- ICE_AQ_TASK_WAITING = 0,
- ICE_AQ_TASK_COMPLETE,
- ICE_AQ_TASK_CANCELED,
-};
-
-struct ice_aq_task {
- struct hlist_node entry;
+/**
+ * ice_aq_prep_for_event - Prepare to wait for an AdminQ event from firmware
+ * @pf: pointer to the PF private structure
+ * @task: intermediate helper storage and identifier for waiting
+ * @opcode: the opcode to wait for
+ *
+ * Prepares to wait for a specific AdminQ completion event on the ARQ for
+ * a given PF. Actual wait would be done by a call to ice_aq_wait_for_event().
+ *
+ * Calls are separated to allow caller registering for event before sending
+ * the command, which mitigates a race between registering and FW responding.
+ *
+ * To obtain only the descriptor contents, pass an task->event with null
+ * msg_buf. If the complete data buffer is desired, allocate the
+ * task->event.msg_buf with enough space ahead of time.
+ */
+void ice_aq_prep_for_event(struct ice_pf *pf, struct ice_aq_task *task,
+ u16 opcode)
+{
+ INIT_HLIST_NODE(&task->entry);
+ task->opcode = opcode;
+ task->state = ICE_AQ_TASK_WAITING;
- u16 opcode;
- struct ice_rq_event_info *event;
- enum ice_aq_task_state state;
-};
+ spin_lock_bh(&pf->aq_wait_lock);
+ hlist_add_head(&task->entry, &pf->aq_wait_list);
+ spin_unlock_bh(&pf->aq_wait_lock);
+}
/**
* ice_aq_wait_for_event - Wait for an AdminQ event from firmware
* @pf: pointer to the PF private structure
- * @opcode: the opcode to wait for
+ * @task: ptr prepared by ice_aq_prep_for_event()
* @timeout: how long to wait, in jiffies
- * @event: storage for the event info
*
* Waits for a specific AdminQ completion event on the ARQ for a given PF. The
* current thread will be put to sleep until the specified event occurs or
* until the given timeout is reached.
*
- * To obtain only the descriptor contents, pass an event without an allocated
- * msg_buf. If the complete data buffer is desired, allocate the
- * event->msg_buf with enough space ahead of time.
- *
* Returns: zero on success, or a negative error code on failure.
*/
-int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
- struct ice_rq_event_info *event)
+int ice_aq_wait_for_event(struct ice_pf *pf, struct ice_aq_task *task,
+ unsigned long timeout)
{
+ enum ice_aq_task_state *state = &task->state;
struct device *dev = ice_pf_to_dev(pf);
- struct ice_aq_task *task;
- unsigned long start;
+ unsigned long start = jiffies;
long ret;
int err;
- task = kzalloc(sizeof(*task), GFP_KERNEL);
- if (!task)
- return -ENOMEM;
-
- INIT_HLIST_NODE(&task->entry);
- task->opcode = opcode;
- task->event = event;
- task->state = ICE_AQ_TASK_WAITING;
-
- spin_lock_bh(&pf->aq_wait_lock);
- hlist_add_head(&task->entry, &pf->aq_wait_list);
- spin_unlock_bh(&pf->aq_wait_lock);
-
- start = jiffies;
-
- ret = wait_event_interruptible_timeout(pf->aq_wait_queue, task->state,
+ ret = wait_event_interruptible_timeout(pf->aq_wait_queue,
+ *state != ICE_AQ_TASK_WAITING,
timeout);
- switch (task->state) {
+ switch (*state) {
+ case ICE_AQ_TASK_NOT_PREPARED:
+ WARN(1, "call to %s without ice_aq_prep_for_event()", __func__);
+ err = -EINVAL;
+ break;
case ICE_AQ_TASK_WAITING:
err = ret < 0 ? ret : -ETIMEDOUT;
break;
case ICE_AQ_TASK_CANCELED:
err = ret < 0 ? ret : -ECANCELED;
break;
case ICE_AQ_TASK_COMPLETE:
err = ret < 0 ? ret : 0;
break;
default:
- WARN(1, "Unexpected AdminQ wait task state %u", task->state);
+ WARN(1, "Unexpected AdminQ wait task state %u", *state);
err = -EINVAL;
break;
}
dev_dbg(dev, "Waited %u msecs (max %u msecs) for firmware response to op 0x%04x\n",
jiffies_to_msecs(jiffies - start),
jiffies_to_msecs(timeout),
- opcode);
+ task->opcode);
spin_lock_bh(&pf->aq_wait_lock);
hlist_del(&task->entry);
spin_unlock_bh(&pf->aq_wait_lock);
- kfree(task);
return err;
}
@@ -1362,18 +1360,20 @@ static void ice_aq_check_events(struct ice_pf *pf, u16 opcode,
spin_lock_bh(&pf->aq_wait_lock);
hlist_for_each_entry(task, &pf->aq_wait_list, entry) {
- if (task->state || task->opcode != opcode)
+ if (task->state != ICE_AQ_TASK_WAITING)
+ continue;
+ if (task->opcode != opcode)
continue;
- memcpy(&task->event->desc, &event->desc, sizeof(event->desc));
- task->event->msg_len = event->msg_len;
+ memcpy(&task->event.desc, &event->desc, sizeof(event->desc));
+ task->event.msg_len = event->msg_len;
/* Only copy the data buffer if a destination was set */
- if (task->event->msg_buf &&
- task->event->buf_len > event->buf_len) {
- memcpy(task->event->msg_buf, event->msg_buf,
+ if (task->event.msg_buf &&
+ task->event.buf_len >= event->buf_len) {
+ memcpy(task->event.msg_buf, event->msg_buf,
event->buf_len);
- task->event->buf_len = event->buf_len;
+ task->event.buf_len = event->buf_len;
}
task->state = ICE_AQ_TASK_COMPLETE;
--
2.40.1
Powered by blists - more mailing lists