Message ID | 20230815045658.80494-13-michael.chan@broadcom.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | bnxt_en: Update for net-next | expand |
On Mon, Aug 14, 2023 at 09:56:58PM -0700, Michael Chan wrote: > From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com> > > Newer FW will send a new async event when it detects that > the chip's temperature has crossed the configured threshold value. > The driver will now notify hwmon and will log a warning message. > > Cc: Jean Delvare <jdelvare@suse.com> > Cc: Guenter Roeck <linux@roeck-us.net> > Cc: linux-hwmon@vger.kernel.org > Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com> > Signed-off-by: Michael Chan <michael.chan@broadcom.com> > --- > drivers/net/ethernet/broadcom/bnxt/bnxt.c | 44 +++++++++++++++++++ > .../net/ethernet/broadcom/bnxt/bnxt_hwmon.c | 8 ++++ > .../net/ethernet/broadcom/bnxt/bnxt_hwmon.h | 5 +++ > 3 files changed, 57 insertions(+) > > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c > index 5e97a3d93e87..c8e04c9501ee 100644 > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c > @@ -2130,6 +2130,17 @@ static u16 bnxt_agg_ring_id_to_grp_idx(struct bnxt *bp, u16 ring_id) > return INVALID_HW_RING_ID; > } > > +#define BNXT_EVENT_THERMAL_CURRENT_TEMP(data2) \ > + ((data2) & ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_CURRENT_TEMP_MASK) > + > +#define BNXT_EVENT_THERMAL_THRESHOLD_TEMP(data2) \ > + (((data2) & \ > + ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_THRESHOLD_TEMP_MASK) >> \ > + ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_THRESHOLD_TEMP_SFT) > + > +#define EVENT_DATA1_THERMAL_THRESHOLD_TYPE(data1) \ > + ((data1) & ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_MASK) > + > static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2) > { > u32 err_type = BNXT_EVENT_ERROR_REPORT_TYPE(data1); > @@ -2145,6 +2156,39 @@ static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2) > case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD: > netdev_warn(bp->dev, "One or more MMIO doorbells dropped by the device!\n"); > break; > + case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD: { > + char *threshold_type; > + u32 attr; > + > + switch (EVENT_DATA1_THERMAL_THRESHOLD_TYPE(data1)) { > + case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_WARN: > + attr = hwmon_temp_lcrit_alarm; As with previous patch, wrong attribute > + threshold_type = "warning"; > + break; > + case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_CRITICAL: > + attr = hwmon_temp_crit_alarm; > + threshold_type = "critical"; > + break; > + case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_FATAL: > + attr = hwmon_temp_emergency_alarm; > + threshold_type = "fatal"; > + break; > + case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_SHUTDOWN: > + attr = hwmon_temp_max_alarm; Same here. Overall it seems to me it would be better to keep hwmon internals out of this file and just pass the threshold type to the hwmon code. > + threshold_type = "shutdown"; > + break; > + default: > + netdev_err(bp->dev, "Unknown Thermal threshold type event\n"); > + return; > + } > + netdev_warn(bp->dev, "Chip temperature has crossed the %s thermal threshold!\n", > + threshold_type); > + netdev_warn(bp->dev, "Temperature (In Celsius), Current: %lu, threshold: %lu\n", > + BNXT_EVENT_THERMAL_CURRENT_TEMP(data2), > + BNXT_EVENT_THERMAL_THRESHOLD_TEMP(data2)); > + bnxt_hwmon_notify_event(bp, attr); > + break; > + } > default: > netdev_err(bp->dev, "FW reported unknown error type %u\n", > err_type); > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c > index f5affac1169a..483571264276 100644 > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c > @@ -18,6 +18,14 @@ > #include "bnxt_hwrm.h" > #include "bnxt_hwmon.h" > > +void bnxt_hwmon_notify_event(struct bnxt *bp, u32 attr) > +{ > + if (!bp->hwmon_dev) > + return; > + > + hwmon_notify_event(&bp->pdev->dev, hwmon_temp, attr, 0); > +} > + > static int bnxt_hwrm_temp_query(struct bnxt *bp, u8 *temp) > { > struct hwrm_temp_monitor_query_output *resp; > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.h > index af310066687c..5cf127702764 100644 > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.h > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.h > @@ -11,9 +11,14 @@ > #define BNXT_HWMON_H > > #ifdef CONFIG_BNXT_HWMON > +void bnxt_hwmon_notify_event(struct bnxt *bp, u32 attr); > void bnxt_hwmon_uninit(struct bnxt *bp); > void bnxt_hwmon_init(struct bnxt *bp); > #else > +static inline void bnxt_hwmon_notify_event(struct bnxt *bp, u32 attr) > +{ > +} > + > static inline void bnxt_hwmon_uninit(struct bnxt *bp) > { > } > -- > 2.30.1 >
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5e97a3d93e87..c8e04c9501ee 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2130,6 +2130,17 @@ static u16 bnxt_agg_ring_id_to_grp_idx(struct bnxt *bp, u16 ring_id) return INVALID_HW_RING_ID; } +#define BNXT_EVENT_THERMAL_CURRENT_TEMP(data2) \ + ((data2) & ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_CURRENT_TEMP_MASK) + +#define BNXT_EVENT_THERMAL_THRESHOLD_TEMP(data2) \ + (((data2) & \ + ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_THRESHOLD_TEMP_MASK) >> \ + ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_THRESHOLD_TEMP_SFT) + +#define EVENT_DATA1_THERMAL_THRESHOLD_TYPE(data1) \ + ((data1) & ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_MASK) + static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2) { u32 err_type = BNXT_EVENT_ERROR_REPORT_TYPE(data1); @@ -2145,6 +2156,39 @@ static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2) case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD: netdev_warn(bp->dev, "One or more MMIO doorbells dropped by the device!\n"); break; + case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD: { + char *threshold_type; + u32 attr; + + switch (EVENT_DATA1_THERMAL_THRESHOLD_TYPE(data1)) { + case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_WARN: + attr = hwmon_temp_lcrit_alarm; + threshold_type = "warning"; + break; + case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_CRITICAL: + attr = hwmon_temp_crit_alarm; + threshold_type = "critical"; + break; + case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_FATAL: + attr = hwmon_temp_emergency_alarm; + threshold_type = "fatal"; + break; + case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_SHUTDOWN: + attr = hwmon_temp_max_alarm; + threshold_type = "shutdown"; + break; + default: + netdev_err(bp->dev, "Unknown Thermal threshold type event\n"); + return; + } + netdev_warn(bp->dev, "Chip temperature has crossed the %s thermal threshold!\n", + threshold_type); + netdev_warn(bp->dev, "Temperature (In Celsius), Current: %lu, threshold: %lu\n", + BNXT_EVENT_THERMAL_CURRENT_TEMP(data2), + BNXT_EVENT_THERMAL_THRESHOLD_TEMP(data2)); + bnxt_hwmon_notify_event(bp, attr); + break; + } default: netdev_err(bp->dev, "FW reported unknown error type %u\n", err_type); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c index f5affac1169a..483571264276 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c @@ -18,6 +18,14 @@ #include "bnxt_hwrm.h" #include "bnxt_hwmon.h" +void bnxt_hwmon_notify_event(struct bnxt *bp, u32 attr) +{ + if (!bp->hwmon_dev) + return; + + hwmon_notify_event(&bp->pdev->dev, hwmon_temp, attr, 0); +} + static int bnxt_hwrm_temp_query(struct bnxt *bp, u8 *temp) { struct hwrm_temp_monitor_query_output *resp; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.h index af310066687c..5cf127702764 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.h @@ -11,9 +11,14 @@ #define BNXT_HWMON_H #ifdef CONFIG_BNXT_HWMON +void bnxt_hwmon_notify_event(struct bnxt *bp, u32 attr); void bnxt_hwmon_uninit(struct bnxt *bp); void bnxt_hwmon_init(struct bnxt *bp); #else +static inline void bnxt_hwmon_notify_event(struct bnxt *bp, u32 attr) +{ +} + static inline void bnxt_hwmon_uninit(struct bnxt *bp) { }