Message ID | a9b97ecf80c722b42eceac1800f78ba57027af48.1719849427.git.petrm@nvidia.com (mailing list archive) |
---|---|
State | New |
Delegated to: | Daniel Lezcano |
Headers | show |
Series | None | expand |
On 7/1/24 18:41, Petr Machata wrote: > From: Ido Schimmel <idosch@nvidia.com> > > Commit 31a0fa0019b0 ("thermal/debugfs: Pass cooling device state to > thermal_debug_cdev_add()") changed the thermal core to read the current > state of the cooling device as part of the cooling device's > registration. This is incompatible with the current implementation of > the cooling device operations in mlxsw, leading to initialization > failure with errors such as: > > mlxsw_spectrum 0000:01:00.0: Failed to register cooling device > mlxsw_spectrum 0000:01:00.0: cannot register bus device > > The reason for the failure is that when the get current state operation > is invoked the driver tries to derive the index of the cooling device by > walking a per thermal zone array and looking for the matching cooling > device pointer. However, the pointer is returned from the registration > function and therefore only set in the array after the registration. > > The issue was later fixed by commit 1af89dedc8a5 ("thermal: core: Do not > fail cdev registration because of invalid initial state") by not failing > the registration of the cooling device if it cannot report a valid > current state during registration, although drivers are responsible for > ensuring that this will not happen. > > Therefore, make sure the driver is able to report a valid current state > for the cooling device during registration by passing to the > registration function a per cooling device private data that already has > the cooling device index populated. > > Cc: linux-pm@vger.kernel.org > Reviewed-by: Vadim Pasternak <vadimp@nvidia.com> > Signed-off-by: Ido Schimmel <idosch@nvidia.com> > Signed-off-by: Petr Machata <petrm@nvidia.com> > --- > .../ethernet/mellanox/mlxsw/core_thermal.c | 50 ++++++++++--------- > 1 file changed, 26 insertions(+), 24 deletions(-) > just two nitpicks > @@ -824,8 +828,8 @@ int mlxsw_thermal_init(struct mlxsw_core *core, > err_thermal_zone_device_register: > err_thermal_cooling_device_register: > for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) > - if (thermal->cdevs[i]) > - thermal_cooling_device_unregister(thermal->cdevs[i]); > + if (thermal->cdevs[i].cdev) this check is done by thermal_cooling_device_unregister() > + thermal_cooling_device_unregister(thermal->cdevs[i].cdev); > err_reg_write: > err_reg_query: > kfree(thermal); > @@ -848,10 +852,8 @@ void mlxsw_thermal_fini(struct mlxsw_thermal *thermal) > } > > for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) { > - if (thermal->cdevs[i]) { > - thermal_cooling_device_unregister(thermal->cdevs[i]); > - thermal->cdevs[i] = NULL; > - } > + if (thermal->cdevs[i].cdev) ditto > + thermal_cooling_device_unregister(thermal->cdevs[i].cdev); > } > > kfree(thermal);
On Mon, Jul 1, 2024 at 6:45 PM Petr Machata <petrm@nvidia.com> wrote: > > From: Ido Schimmel <idosch@nvidia.com> > > Commit 31a0fa0019b0 ("thermal/debugfs: Pass cooling device state to > thermal_debug_cdev_add()") changed the thermal core to read the current > state of the cooling device as part of the cooling device's > registration. This is incompatible with the current implementation of > the cooling device operations in mlxsw, leading to initialization > failure with errors such as: > > mlxsw_spectrum 0000:01:00.0: Failed to register cooling device > mlxsw_spectrum 0000:01:00.0: cannot register bus device > > The reason for the failure is that when the get current state operation > is invoked the driver tries to derive the index of the cooling device by > walking a per thermal zone array and looking for the matching cooling > device pointer. However, the pointer is returned from the registration > function and therefore only set in the array after the registration. > > The issue was later fixed by commit 1af89dedc8a5 ("thermal: core: Do not > fail cdev registration because of invalid initial state") by not failing > the registration of the cooling device if it cannot report a valid > current state during registration, although drivers are responsible for > ensuring that this will not happen. > > Therefore, make sure the driver is able to report a valid current state > for the cooling device during registration by passing to the > registration function a per cooling device private data that already has > the cooling device index populated. > > Cc: linux-pm@vger.kernel.org > Reviewed-by: Vadim Pasternak <vadimp@nvidia.com> > Signed-off-by: Ido Schimmel <idosch@nvidia.com> > Signed-off-by: Petr Machata <petrm@nvidia.com> Acked-by: Rafael J. Wysocki <rafael@kernel.org> > --- > .../ethernet/mellanox/mlxsw/core_thermal.c | 50 ++++++++++--------- > 1 file changed, 26 insertions(+), 24 deletions(-) > > diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c > index 5c511e1a8efa..eee3e37983ca 100644 > --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c > +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c > @@ -100,6 +100,12 @@ static const struct mlxsw_cooling_states default_cooling_states[] = { > > struct mlxsw_thermal; > > +struct mlxsw_thermal_cooling_device { > + struct mlxsw_thermal *thermal; > + struct thermal_cooling_device *cdev; > + unsigned int idx; > +}; > + > struct mlxsw_thermal_module { > struct mlxsw_thermal *parent; > struct thermal_zone_device *tzdev; > @@ -123,7 +129,7 @@ struct mlxsw_thermal { > const struct mlxsw_bus_info *bus_info; > struct thermal_zone_device *tzdev; > int polling_delay; > - struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX]; > + struct mlxsw_thermal_cooling_device cdevs[MLXSW_MFCR_PWMS_MAX]; > struct thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; > struct mlxsw_cooling_states cooling_states[MLXSW_THERMAL_NUM_TRIPS]; > struct mlxsw_thermal_area line_cards[]; > @@ -147,7 +153,7 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal, > int i; > > for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) > - if (thermal->cdevs[i] == cdev) > + if (thermal->cdevs[i].cdev == cdev) > return i; > > /* Allow mlxsw thermal zone binding to an external cooling device */ > @@ -352,17 +358,14 @@ static int mlxsw_thermal_get_cur_state(struct thermal_cooling_device *cdev, > unsigned long *p_state) > > { > - struct mlxsw_thermal *thermal = cdev->devdata; > + struct mlxsw_thermal_cooling_device *mlxsw_cdev = cdev->devdata; > + struct mlxsw_thermal *thermal = mlxsw_cdev->thermal; > struct device *dev = thermal->bus_info->dev; > char mfsc_pl[MLXSW_REG_MFSC_LEN]; > - int err, idx; > u8 duty; > + int err; > > - idx = mlxsw_get_cooling_device_idx(thermal, cdev); > - if (idx < 0) > - return idx; > - > - mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0); > + mlxsw_reg_mfsc_pack(mfsc_pl, mlxsw_cdev->idx, 0); > err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl); > if (err) { > dev_err(dev, "Failed to query PWM duty\n"); > @@ -378,22 +381,19 @@ static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev, > unsigned long state) > > { > - struct mlxsw_thermal *thermal = cdev->devdata; > + struct mlxsw_thermal_cooling_device *mlxsw_cdev = cdev->devdata; > + struct mlxsw_thermal *thermal = mlxsw_cdev->thermal; > struct device *dev = thermal->bus_info->dev; > char mfsc_pl[MLXSW_REG_MFSC_LEN]; > - int idx; > int err; > > if (state > MLXSW_THERMAL_MAX_STATE) > return -EINVAL; > > - idx = mlxsw_get_cooling_device_idx(thermal, cdev); > - if (idx < 0) > - return idx; > - > /* Normalize the state to the valid speed range. */ > state = max_t(unsigned long, MLXSW_THERMAL_MIN_STATE, state); > - mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state)); > + mlxsw_reg_mfsc_pack(mfsc_pl, mlxsw_cdev->idx, > + mlxsw_state_to_duty(state)); > err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl); > if (err) { > dev_err(dev, "Failed to write PWM duty\n"); > @@ -753,17 +753,21 @@ int mlxsw_thermal_init(struct mlxsw_core *core, > } > for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) { > if (pwm_active & BIT(i)) { > + struct mlxsw_thermal_cooling_device *mlxsw_cdev; > struct thermal_cooling_device *cdev; > > + mlxsw_cdev = &thermal->cdevs[i]; > + mlxsw_cdev->thermal = thermal; > + mlxsw_cdev->idx = i; > cdev = thermal_cooling_device_register("mlxsw_fan", > - thermal, > + mlxsw_cdev, > &mlxsw_cooling_ops); > if (IS_ERR(cdev)) { > err = PTR_ERR(cdev); > dev_err(dev, "Failed to register cooling device\n"); > goto err_thermal_cooling_device_register; > } > - thermal->cdevs[i] = cdev; > + mlxsw_cdev->cdev = cdev; > } > } > > @@ -824,8 +828,8 @@ int mlxsw_thermal_init(struct mlxsw_core *core, > err_thermal_zone_device_register: > err_thermal_cooling_device_register: > for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) > - if (thermal->cdevs[i]) > - thermal_cooling_device_unregister(thermal->cdevs[i]); > + if (thermal->cdevs[i].cdev) > + thermal_cooling_device_unregister(thermal->cdevs[i].cdev); > err_reg_write: > err_reg_query: > kfree(thermal); > @@ -848,10 +852,8 @@ void mlxsw_thermal_fini(struct mlxsw_thermal *thermal) > } > > for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) { > - if (thermal->cdevs[i]) { > - thermal_cooling_device_unregister(thermal->cdevs[i]); > - thermal->cdevs[i] = NULL; > - } > + if (thermal->cdevs[i].cdev) > + thermal_cooling_device_unregister(thermal->cdevs[i].cdev); > } > > kfree(thermal); > -- > 2.45.0 > >
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 5c511e1a8efa..eee3e37983ca 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -100,6 +100,12 @@ static const struct mlxsw_cooling_states default_cooling_states[] = { struct mlxsw_thermal; +struct mlxsw_thermal_cooling_device { + struct mlxsw_thermal *thermal; + struct thermal_cooling_device *cdev; + unsigned int idx; +}; + struct mlxsw_thermal_module { struct mlxsw_thermal *parent; struct thermal_zone_device *tzdev; @@ -123,7 +129,7 @@ struct mlxsw_thermal { const struct mlxsw_bus_info *bus_info; struct thermal_zone_device *tzdev; int polling_delay; - struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX]; + struct mlxsw_thermal_cooling_device cdevs[MLXSW_MFCR_PWMS_MAX]; struct thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; struct mlxsw_cooling_states cooling_states[MLXSW_THERMAL_NUM_TRIPS]; struct mlxsw_thermal_area line_cards[]; @@ -147,7 +153,7 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal, int i; for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) - if (thermal->cdevs[i] == cdev) + if (thermal->cdevs[i].cdev == cdev) return i; /* Allow mlxsw thermal zone binding to an external cooling device */ @@ -352,17 +358,14 @@ static int mlxsw_thermal_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *p_state) { - struct mlxsw_thermal *thermal = cdev->devdata; + struct mlxsw_thermal_cooling_device *mlxsw_cdev = cdev->devdata; + struct mlxsw_thermal *thermal = mlxsw_cdev->thermal; struct device *dev = thermal->bus_info->dev; char mfsc_pl[MLXSW_REG_MFSC_LEN]; - int err, idx; u8 duty; + int err; - idx = mlxsw_get_cooling_device_idx(thermal, cdev); - if (idx < 0) - return idx; - - mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0); + mlxsw_reg_mfsc_pack(mfsc_pl, mlxsw_cdev->idx, 0); err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl); if (err) { dev_err(dev, "Failed to query PWM duty\n"); @@ -378,22 +381,19 @@ static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state) { - struct mlxsw_thermal *thermal = cdev->devdata; + struct mlxsw_thermal_cooling_device *mlxsw_cdev = cdev->devdata; + struct mlxsw_thermal *thermal = mlxsw_cdev->thermal; struct device *dev = thermal->bus_info->dev; char mfsc_pl[MLXSW_REG_MFSC_LEN]; - int idx; int err; if (state > MLXSW_THERMAL_MAX_STATE) return -EINVAL; - idx = mlxsw_get_cooling_device_idx(thermal, cdev); - if (idx < 0) - return idx; - /* Normalize the state to the valid speed range. */ state = max_t(unsigned long, MLXSW_THERMAL_MIN_STATE, state); - mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state)); + mlxsw_reg_mfsc_pack(mfsc_pl, mlxsw_cdev->idx, + mlxsw_state_to_duty(state)); err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl); if (err) { dev_err(dev, "Failed to write PWM duty\n"); @@ -753,17 +753,21 @@ int mlxsw_thermal_init(struct mlxsw_core *core, } for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) { if (pwm_active & BIT(i)) { + struct mlxsw_thermal_cooling_device *mlxsw_cdev; struct thermal_cooling_device *cdev; + mlxsw_cdev = &thermal->cdevs[i]; + mlxsw_cdev->thermal = thermal; + mlxsw_cdev->idx = i; cdev = thermal_cooling_device_register("mlxsw_fan", - thermal, + mlxsw_cdev, &mlxsw_cooling_ops); if (IS_ERR(cdev)) { err = PTR_ERR(cdev); dev_err(dev, "Failed to register cooling device\n"); goto err_thermal_cooling_device_register; } - thermal->cdevs[i] = cdev; + mlxsw_cdev->cdev = cdev; } } @@ -824,8 +828,8 @@ int mlxsw_thermal_init(struct mlxsw_core *core, err_thermal_zone_device_register: err_thermal_cooling_device_register: for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) - if (thermal->cdevs[i]) - thermal_cooling_device_unregister(thermal->cdevs[i]); + if (thermal->cdevs[i].cdev) + thermal_cooling_device_unregister(thermal->cdevs[i].cdev); err_reg_write: err_reg_query: kfree(thermal); @@ -848,10 +852,8 @@ void mlxsw_thermal_fini(struct mlxsw_thermal *thermal) } for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) { - if (thermal->cdevs[i]) { - thermal_cooling_device_unregister(thermal->cdevs[i]); - thermal->cdevs[i] = NULL; - } + if (thermal->cdevs[i].cdev) + thermal_cooling_device_unregister(thermal->cdevs[i].cdev); } kfree(thermal);