[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230308051310.12544-4-shannon.nelson@amd.com>
Date: Tue, 7 Mar 2023 21:13:00 -0800
From: Shannon Nelson <shannon.nelson@....com>
To: <shannon.nelson@....com>, <brett.creeley@....com>,
<davem@...emloft.net>, <netdev@...r.kernel.org>, <kuba@...nel.org>
CC: <drivers@...sando.io>, <leon@...nel.org>, <jiri@...nulli.us>
Subject: [PATCH RFC v4 net-next 03/13] pds_core: health timer and workqueue
Add in the periodic health check and the related workqueue,
as well as the handlers for when a FW reset is seen.
Signed-off-by: Shannon Nelson <shannon.nelson@....com>
---
drivers/net/ethernet/amd/pds_core/core.c | 60 ++++++++++++++++++++++++
drivers/net/ethernet/amd/pds_core/dev.c | 3 ++
drivers/net/ethernet/amd/pds_core/main.c | 51 ++++++++++++++++++++
include/linux/pds/pds_core.h | 10 ++++
4 files changed, 124 insertions(+)
diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
index 88a6aa42cc28..87717bf40e80 100644
--- a/drivers/net/ethernet/amd/pds_core/core.c
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -34,3 +34,63 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing)
set_bit(PDSC_S_FW_DEAD, &pdsc->state);
}
+
+static void pdsc_fw_down(struct pdsc *pdsc)
+{
+ mutex_lock(&pdsc->config_lock);
+
+ if (test_and_set_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
+ dev_err(pdsc->dev, "%s: already happening\n", __func__);
+ mutex_unlock(&pdsc->config_lock);
+ return;
+ }
+
+ pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
+
+ mutex_unlock(&pdsc->config_lock);
+}
+
+static void pdsc_fw_up(struct pdsc *pdsc)
+{
+ int err;
+
+ mutex_lock(&pdsc->config_lock);
+
+ if (!test_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
+ dev_err(pdsc->dev, "%s: fw not dead\n", __func__);
+ mutex_unlock(&pdsc->config_lock);
+ return;
+ }
+
+ err = pdsc_setup(pdsc, PDSC_SETUP_RECOVERY);
+ if (err)
+ goto err_out;
+
+ mutex_unlock(&pdsc->config_lock);
+
+ return;
+
+err_out:
+ pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
+ mutex_unlock(&pdsc->config_lock);
+}
+
+void pdsc_health_thread(struct work_struct *work)
+{
+ struct pdsc *pdsc = container_of(work, struct pdsc, health_work);
+ bool healthy;
+
+ healthy = pdsc_is_fw_good(pdsc);
+ dev_dbg(pdsc->dev, "%s: health %d fw_status %#02x fw_heartbeat %d\n",
+ __func__, healthy, pdsc->fw_status, pdsc->last_hb);
+
+ if (test_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
+ if (healthy)
+ pdsc_fw_up(pdsc);
+ } else {
+ if (!healthy)
+ pdsc_fw_down(pdsc);
+ }
+
+ pdsc->fw_generation = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION;
+}
diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c
index 7a9db1505c1f..43229c149b2f 100644
--- a/drivers/net/ethernet/amd/pds_core/dev.c
+++ b/drivers/net/ethernet/amd/pds_core/dev.c
@@ -177,6 +177,9 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd,
err = pdsc_devcmd_wait(pdsc, max_seconds);
memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp));
+ if (err == -ENXIO || err == -ETIMEDOUT)
+ pdsc_queue_health_check(pdsc);
+
return err;
}
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
index 0c63bbe8b417..ac1b30f78e0e 100644
--- a/drivers/net/ethernet/amd/pds_core/main.c
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -25,6 +25,31 @@ static const struct pci_device_id pdsc_id_table[] = {
};
MODULE_DEVICE_TABLE(pci, pdsc_id_table);
+void pdsc_queue_health_check(struct pdsc *pdsc)
+{
+ unsigned long mask;
+
+ /* Don't do a check when in a transition state */
+ mask = BIT_ULL(PDSC_S_INITING_DRIVER) |
+ BIT_ULL(PDSC_S_STOPPING_DRIVER);
+ if (pdsc->state & mask)
+ return;
+
+ /* Queue a new health check if one isn't already queued */
+ queue_work(pdsc->wq, &pdsc->health_work);
+}
+
+static void pdsc_wdtimer_cb(struct timer_list *t)
+{
+ struct pdsc *pdsc = from_timer(pdsc, t, wdtimer);
+
+ dev_dbg(pdsc->dev, "%s: jiffies %ld\n", __func__, jiffies);
+ mod_timer(&pdsc->wdtimer,
+ round_jiffies(jiffies + pdsc->wdtimer_period));
+
+ pdsc_queue_health_check(pdsc);
+}
+
static void pdsc_unmap_bars(struct pdsc *pdsc)
{
struct pdsc_dev_bar *bars = pdsc->bars;
@@ -135,8 +160,11 @@ static int pdsc_init_vf(struct pdsc *pdsc)
return -1;
}
+#define PDSC_WQ_NAME_LEN 24
+
static int pdsc_init_pf(struct pdsc *pdsc)
{
+ char wq_name[PDSC_WQ_NAME_LEN];
int err;
pcie_print_link_status(pdsc->pdev);
@@ -151,6 +179,13 @@ static int pdsc_init_pf(struct pdsc *pdsc)
if (err)
goto err_out_release_regions;
+ /* General workqueue and timer, but don't start timer yet */
+ snprintf(wq_name, sizeof(wq_name), "%s.%d", PDS_CORE_DRV_NAME, pdsc->id);
+ pdsc->wq = create_singlethread_workqueue(wq_name);
+ INIT_WORK(&pdsc->health_work, pdsc_health_thread);
+ timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0);
+ pdsc->wdtimer_period = PDSC_WATCHDOG_SECS * HZ;
+
mutex_init(&pdsc->devcmd_lock);
mutex_init(&pdsc->config_lock);
@@ -167,11 +202,20 @@ static int pdsc_init_pf(struct pdsc *pdsc)
mutex_unlock(&pdsc->config_lock);
+ /* Lastly, start the health check timer */
+ mod_timer(&pdsc->wdtimer, round_jiffies(jiffies + pdsc->wdtimer_period));
+
return 0;
err_out_teardown:
pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING);
err_out_unmap_bars:
+ del_timer_sync(&pdsc->wdtimer);
+ if (pdsc->wq) {
+ flush_workqueue(pdsc->wq);
+ destroy_workqueue(pdsc->wq);
+ pdsc->wq = NULL;
+ }
mutex_unlock(&pdsc->config_lock);
mutex_destroy(&pdsc->config_lock);
mutex_destroy(&pdsc->devcmd_lock);
@@ -262,6 +306,13 @@ static void pdsc_remove(struct pci_dev *pdev)
mutex_lock(&pdsc->config_lock);
set_bit(PDSC_S_STOPPING_DRIVER, &pdsc->state);
+ del_timer_sync(&pdsc->wdtimer);
+ if (pdsc->wq) {
+ flush_workqueue(pdsc->wq);
+ destroy_workqueue(pdsc->wq);
+ pdsc->wq = NULL;
+ }
+
pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING);
mutex_unlock(&pdsc->config_lock);
mutex_destroy(&pdsc->config_lock);
diff --git a/include/linux/pds/pds_core.h b/include/linux/pds/pds_core.h
index da0663925018..e73af422fae0 100644
--- a/include/linux/pds/pds_core.h
+++ b/include/linux/pds/pds_core.h
@@ -12,6 +12,8 @@
#include <linux/pds/pds_intr.h>
#define PDSC_DRV_DESCRIPTION "AMD/Pensando Core Driver"
+
+#define PDSC_WATCHDOG_SECS 5
#define PDSC_TEARDOWN_RECOVERY false
#define PDSC_TEARDOWN_REMOVING true
#define PDSC_SETUP_RECOVERY false
@@ -63,12 +65,17 @@ struct pdsc {
u8 fw_generation;
unsigned long last_fw_time;
u32 last_hb;
+ struct timer_list wdtimer;
+ unsigned int wdtimer_period;
+ struct work_struct health_work;
struct pdsc_devinfo dev_info;
struct pds_core_dev_identity dev_ident;
unsigned int nintrs;
struct pdsc_intr_info *intr_info; /* array of nintrs elements */
+ struct workqueue_struct *wq;
+
unsigned int devcmd_timeout;
struct mutex devcmd_lock; /* lock for dev_cmd operations */
struct mutex config_lock; /* lock for configuration operations */
@@ -81,6 +88,8 @@ struct pdsc {
u64 __iomem *kern_dbpage;
};
+void pdsc_queue_health_check(struct pdsc *pdsc);
+
struct pdsc *pdsc_dl_alloc(struct device *dev, bool is_pf);
void pdsc_dl_free(struct pdsc *pdsc);
int pdsc_dl_register(struct pdsc *pdsc);
@@ -116,5 +125,6 @@ int pdsc_dev_init(struct pdsc *pdsc);
int pdsc_setup(struct pdsc *pdsc, bool init);
void pdsc_teardown(struct pdsc *pdsc, bool removing);
+void pdsc_health_thread(struct work_struct *work);
#endif /* _PDSC_H_ */
--
2.17.1
Powered by blists - more mailing lists