[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230927035734.42816-8-michael.chan@broadcom.com>
Date: Tue, 26 Sep 2023 20:57:32 -0700
From: Michael Chan <michael.chan@...adcom.com>
To: davem@...emloft.net
Cc: netdev@...r.kernel.org,
edumazet@...gle.com,
kuba@...nel.org,
pabeni@...hat.com,
gospo@...adcom.com,
Kalesh AP <kalesh-anakkur.purayil@...adcom.com>,
Jean Delvare <jdelvare@...e.com>,
Guenter Roeck <linux@...ck-us.net>,
linux-hwmon@...r.kernel.org
Subject: [PATCH net-next v2 7/9] bnxt_en: Event handler for Thermal event
From: Kalesh AP <kalesh-anakkur.purayil@...adcom.com>
Newer FW will send a new async event when it detects that
the chip's temperature has crossed the configured threshold value.
The driver will now notify hwmon and will log a warning message.
Link: https://lore.kernel.org/netdev/20230815045658.80494-13-michael.chan@broadcom.com/
Cc: Jean Delvare <jdelvare@...e.com>
Cc: Guenter Roeck <linux@...ck-us.net>
Cc: linux-hwmon@...r.kernel.org
Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@...adcom.com>
Signed-off-by: Michael Chan <michael.chan@...adcom.com>
---
v2:
Remove hwmon dependencies from bnxt.c.
---
drivers/net/ethernet/broadcom/bnxt/bnxt.c | 52 +++++++++++++++++++
.../net/ethernet/broadcom/bnxt/bnxt_hwmon.c | 25 +++++++++
.../net/ethernet/broadcom/bnxt/bnxt_hwmon.h | 5 ++
3 files changed, 82 insertions(+)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b83f8de0a015..7104237272de 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2129,6 +2129,24 @@ static u16 bnxt_agg_ring_id_to_grp_idx(struct bnxt *bp, u16 ring_id)
return INVALID_HW_RING_ID;
}
+#define BNXT_EVENT_THERMAL_CURRENT_TEMP(data2) \
+ ((data2) & \
+ ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_CURRENT_TEMP_MASK)
+
+#define BNXT_EVENT_THERMAL_THRESHOLD_TEMP(data2) \
+ (((data2) & \
+ ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_THRESHOLD_TEMP_MASK) >>\
+ ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_THRESHOLD_TEMP_SFT)
+
+#define EVENT_DATA1_THERMAL_THRESHOLD_TYPE(data1) \
+ ((data1) & \
+ ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_MASK)
+
+#define EVENT_DATA1_THERMAL_THRESHOLD_DIR_INCREASING(data1) \
+ (((data1) & \
+ ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_TRANSITION_DIR) ==\
+ ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_TRANSITION_DIR_INCREASING)
+
static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
{
u32 err_type = BNXT_EVENT_ERROR_REPORT_TYPE(data1);
@@ -2144,6 +2162,40 @@ static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD:
netdev_warn(bp->dev, "One or more MMIO doorbells dropped by the device!\n");
break;
+ case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD: {
+ u32 type = EVENT_DATA1_THERMAL_THRESHOLD_TYPE(data1);
+ char *threshold_type;
+ char *dir_str;
+
+ switch (type) {
+ case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_WARN:
+ threshold_type = "warning";
+ break;
+ case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_CRITICAL:
+ threshold_type = "critical";
+ break;
+ case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_FATAL:
+ threshold_type = "fatal";
+ break;
+ case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_SHUTDOWN:
+ threshold_type = "shutdown";
+ break;
+ default:
+ netdev_err(bp->dev, "Unknown Thermal threshold type event\n");
+ return;
+ }
+ if (EVENT_DATA1_THERMAL_THRESHOLD_DIR_INCREASING(data1))
+ dir_str = "above";
+ else
+ dir_str = "below";
+ netdev_warn(bp->dev, "Chip temperature has gone %s the %s thermal threshold!\n",
+ dir_str, threshold_type);
+ netdev_warn(bp->dev, "Temperature (In Celsius), Current: %lu, threshold: %lu\n",
+ BNXT_EVENT_THERMAL_CURRENT_TEMP(data2),
+ BNXT_EVENT_THERMAL_THRESHOLD_TEMP(data2));
+ bnxt_hwmon_notify_event(bp, type);
+ break;
+ }
default:
netdev_err(bp->dev, "FW reported unknown error type %u\n",
err_type);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c
index 6d36158df26e..e48094043c3b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c
@@ -18,6 +18,31 @@
#include "bnxt_hwrm.h"
#include "bnxt_hwmon.h"
+void bnxt_hwmon_notify_event(struct bnxt *bp, u32 type)
+{
+ u32 attr;
+
+ if (!bp->hwmon_dev)
+ return;
+
+ switch (type) {
+ case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_WARN:
+ attr = hwmon_temp_max_alarm;
+ break;
+ case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_CRITICAL:
+ attr = hwmon_temp_crit_alarm;
+ break;
+ case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_FATAL:
+ case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_SHUTDOWN:
+ attr = hwmon_temp_emergency_alarm;
+ break;
+ default:
+ return;
+ }
+
+ hwmon_notify_event(&bp->pdev->dev, hwmon_temp, attr, 0);
+}
+
static int bnxt_hwrm_temp_query(struct bnxt *bp, u8 *temp)
{
struct hwrm_temp_monitor_query_output *resp;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.h
index af310066687c..76d9f599ebc0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.h
@@ -11,9 +11,14 @@
#define BNXT_HWMON_H
#ifdef CONFIG_BNXT_HWMON
+void bnxt_hwmon_notify_event(struct bnxt *bp, u32 type);
void bnxt_hwmon_uninit(struct bnxt *bp);
void bnxt_hwmon_init(struct bnxt *bp);
#else
+static inline void bnxt_hwmon_notify_event(struct bnxt *bp, u32 type)
+{
+}
+
static inline void bnxt_hwmon_uninit(struct bnxt *bp)
{
}
--
2.30.1
Download attachment "smime.p7s" of type "application/pkcs7-signature" (4209 bytes)
Powered by blists - more mailing lists