Message ID | 20240330112409.3402943-3-luzmaximilian@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | Add thermal sensor driver for Surface Aggregator | expand |
Hi, On 3/30/24 12:24 PM, Maximilian Luz wrote: > From: Ivor Wanders <ivor@iwanders.net> > > The thermal subsystem of the Surface Aggregator Module allows us to > query the names of the respective thermal sensors. Forward those to > userspace. > > Signed-off-by: Ivor Wanders <ivor@iwanders.net> > Co-developed-by: Maximilian Luz <luzmaximilian@gmail.com> > Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com> Thanks, patch looks good to me: Reviewed-by: Hans de Goede <hdegoede@redhat.com> Regards, Hans > --- > drivers/hwmon/surface_temp.c | 112 +++++++++++++++++++++++++++++------ > 1 file changed, 95 insertions(+), 17 deletions(-) > > diff --git a/drivers/hwmon/surface_temp.c b/drivers/hwmon/surface_temp.c > index 48c3e826713f6..7a2e1f638336c 100644 > --- a/drivers/hwmon/surface_temp.c > +++ b/drivers/hwmon/surface_temp.c > @@ -17,6 +17,27 @@ > > /* -- SAM interface. -------------------------------------------------------- */ > > +/* > + * Available sensors are indicated by a 16-bit bitfield, where a 1 marks the > + * presence of a sensor. So we have at most 16 possible sensors/channels. > + */ > +#define SSAM_TMP_SENSOR_MAX_COUNT 16 > + > +/* > + * All names observed so far are 6 characters long, but there's only > + * zeros after the name, so perhaps they can be longer. This number reflects > + * the maximum zero-padded space observed in the returned buffer. > + */ > +#define SSAM_TMP_SENSOR_NAME_LENGTH 18 > + > +struct ssam_tmp_get_name_rsp { > + __le16 unknown1; > + char unknown2; > + char name[SSAM_TMP_SENSOR_NAME_LENGTH]; > +} __packed; > + > +static_assert(sizeof(struct ssam_tmp_get_name_rsp) == 21); > + > SSAM_DEFINE_SYNC_REQUEST_CL_R(__ssam_tmp_get_available_sensors, __le16, { > .target_category = SSAM_SSH_TC_TMP, > .command_id = 0x04, > @@ -27,6 +48,11 @@ SSAM_DEFINE_SYNC_REQUEST_MD_R(__ssam_tmp_get_temperature, __le16, { > .command_id = 0x01, > }); > > +SSAM_DEFINE_SYNC_REQUEST_MD_R(__ssam_tmp_get_name, struct ssam_tmp_get_name_rsp, { > + .target_category = SSAM_SSH_TC_TMP, > + .command_id = 0x0e, > +}); > + > static int ssam_tmp_get_available_sensors(struct ssam_device *sdev, s16 *sensors) > { > __le16 sensors_le; > @@ -54,12 +80,37 @@ static int ssam_tmp_get_temperature(struct ssam_device *sdev, u8 iid, long *temp > return 0; > } > > +static int ssam_tmp_get_name(struct ssam_device *sdev, u8 iid, char *buf, size_t buf_len) > +{ > + struct ssam_tmp_get_name_rsp name_rsp; > + int status; > + > + status = __ssam_tmp_get_name(sdev->ctrl, sdev->uid.target, iid, &name_rsp); > + if (status) > + return status; > + > + /* > + * This should not fail unless the name in the returned struct is not > + * null-terminated or someone changed something in the struct > + * definitions above, since our buffer and struct have the same > + * capacity by design. So if this fails blow this up with a warning. > + * Since the more likely cause is that the returned string isn't > + * null-terminated, we might have received garbage (as opposed to just > + * an incomplete string), so also fail the function. > + */ > + status = strscpy(buf, name_rsp.name, buf_len); > + WARN_ON(status < 0); > + > + return status < 0 ? status : 0; > +} > + > > /* -- Driver.---------------------------------------------------------------- */ > > struct ssam_temp { > struct ssam_device *sdev; > s16 sensors; > + char names[SSAM_TMP_SENSOR_MAX_COUNT][SSAM_TMP_SENSOR_NAME_LENGTH]; > }; > > static umode_t ssam_temp_hwmon_is_visible(const void *data, > @@ -83,33 +134,47 @@ static int ssam_temp_hwmon_read(struct device *dev, > return ssam_tmp_get_temperature(ssam_temp->sdev, channel + 1, value); > } > > +static int ssam_temp_hwmon_read_string(struct device *dev, > + enum hwmon_sensor_types type, > + u32 attr, int channel, const char **str) > +{ > + const struct ssam_temp *ssam_temp = dev_get_drvdata(dev); > + > + *str = ssam_temp->names[channel]; > + return 0; > +} > + > static const struct hwmon_channel_info * const ssam_temp_hwmon_info[] = { > HWMON_CHANNEL_INFO(chip, > HWMON_C_REGISTER_TZ), > - /* We have at most 16 thermal sensor channels. */ > + /* > + * We have at most SSAM_TMP_SENSOR_MAX_COUNT = 16 thermal sensor > + * channels. > + */ > HWMON_CHANNEL_INFO(temp, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT), > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL), > NULL > }; > > static const struct hwmon_ops ssam_temp_hwmon_ops = { > .is_visible = ssam_temp_hwmon_is_visible, > .read = ssam_temp_hwmon_read, > + .read_string = ssam_temp_hwmon_read_string, > }; > > static const struct hwmon_chip_info ssam_temp_hwmon_chip_info = { > @@ -122,6 +187,7 @@ static int ssam_temp_probe(struct ssam_device *sdev) > struct ssam_temp *ssam_temp; > struct device *hwmon_dev; > s16 sensors; > + int channel; > int status; > > status = ssam_tmp_get_available_sensors(sdev, &sensors); > @@ -135,6 +201,18 @@ static int ssam_temp_probe(struct ssam_device *sdev) > ssam_temp->sdev = sdev; > ssam_temp->sensors = sensors; > > + /* Retrieve the name for each available sensor. */ > + for (channel = 0; channel < SSAM_TMP_SENSOR_MAX_COUNT; channel++) { > + if (!(sensors & BIT(channel))) > + continue; > + > + status = ssam_tmp_get_name(sdev, channel + 1, > + ssam_temp->names[channel], > + SSAM_TMP_SENSOR_NAME_LENGTH); > + if (status) > + return status; > + } > + > hwmon_dev = devm_hwmon_device_register_with_info(&sdev->dev, > "surface_thermal", ssam_temp, &ssam_temp_hwmon_chip_info, > NULL);
On Sat, Mar 30, 2024 at 12:24:01PM +0100, Maximilian Luz wrote: > From: Ivor Wanders <ivor@iwanders.net> > > The thermal subsystem of the Surface Aggregator Module allows us to > query the names of the respective thermal sensors. Forward those to > userspace. > > Signed-off-by: Ivor Wanders <ivor@iwanders.net> > Co-developed-by: Maximilian Luz <luzmaximilian@gmail.com> > Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com> > Reviewed-by: Hans de Goede <hdegoede@redhat.com> > --- > drivers/hwmon/surface_temp.c | 112 +++++++++++++++++++++++++++++------ > 1 file changed, 95 insertions(+), 17 deletions(-) > > diff --git a/drivers/hwmon/surface_temp.c b/drivers/hwmon/surface_temp.c > index 48c3e826713f6..7a2e1f638336c 100644 > --- a/drivers/hwmon/surface_temp.c > +++ b/drivers/hwmon/surface_temp.c > @@ -17,6 +17,27 @@ > > /* -- SAM interface. -------------------------------------------------------- */ > > +/* > + * Available sensors are indicated by a 16-bit bitfield, where a 1 marks the > + * presence of a sensor. So we have at most 16 possible sensors/channels. > + */ > +#define SSAM_TMP_SENSOR_MAX_COUNT 16 #define<space>DEFINITION<tab>value > + > +/* > + * All names observed so far are 6 characters long, but there's only > + * zeros after the name, so perhaps they can be longer. This number reflects > + * the maximum zero-padded space observed in the returned buffer. > + */ > +#define SSAM_TMP_SENSOR_NAME_LENGTH 18 > + > +struct ssam_tmp_get_name_rsp { > + __le16 unknown1; > + char unknown2; > + char name[SSAM_TMP_SENSOR_NAME_LENGTH]; > +} __packed; > + > +static_assert(sizeof(struct ssam_tmp_get_name_rsp) == 21); > + > SSAM_DEFINE_SYNC_REQUEST_CL_R(__ssam_tmp_get_available_sensors, __le16, { > .target_category = SSAM_SSH_TC_TMP, > .command_id = 0x04, > @@ -27,6 +48,11 @@ SSAM_DEFINE_SYNC_REQUEST_MD_R(__ssam_tmp_get_temperature, __le16, { > .command_id = 0x01, > }); > > +SSAM_DEFINE_SYNC_REQUEST_MD_R(__ssam_tmp_get_name, struct ssam_tmp_get_name_rsp, { > + .target_category = SSAM_SSH_TC_TMP, > + .command_id = 0x0e, > +}); > + > static int ssam_tmp_get_available_sensors(struct ssam_device *sdev, s16 *sensors) > { > __le16 sensors_le; > @@ -54,12 +80,37 @@ static int ssam_tmp_get_temperature(struct ssam_device *sdev, u8 iid, long *temp > return 0; > } > > +static int ssam_tmp_get_name(struct ssam_device *sdev, u8 iid, char *buf, size_t buf_len) > +{ > + struct ssam_tmp_get_name_rsp name_rsp; > + int status; > + > + status = __ssam_tmp_get_name(sdev->ctrl, sdev->uid.target, iid, &name_rsp); > + if (status) > + return status; > + > + /* > + * This should not fail unless the name in the returned struct is not > + * null-terminated or someone changed something in the struct > + * definitions above, since our buffer and struct have the same > + * capacity by design. So if this fails blow this up with a warning. > + * Since the more likely cause is that the returned string isn't > + * null-terminated, we might have received garbage (as opposed to just > + * an incomplete string), so also fail the function. > + */ > + status = strscpy(buf, name_rsp.name, buf_len); > + WARN_ON(status < 0); Not acceptable. From include/asm-generic/bug.h: * Do not use these macros when checking for invalid external inputs * (e.g. invalid system call arguments, or invalid data coming from * network/devices), and on transient conditions like ENOMEM or EAGAIN. * These macros should be used for recoverable kernel issues only. > + > + return status < 0 ? status : 0; > +} > + > > /* -- Driver.---------------------------------------------------------------- */ > > struct ssam_temp { > struct ssam_device *sdev; > s16 sensors; > + char names[SSAM_TMP_SENSOR_MAX_COUNT][SSAM_TMP_SENSOR_NAME_LENGTH]; > }; > > static umode_t ssam_temp_hwmon_is_visible(const void *data, > @@ -83,33 +134,47 @@ static int ssam_temp_hwmon_read(struct device *dev, > return ssam_tmp_get_temperature(ssam_temp->sdev, channel + 1, value); > } > > +static int ssam_temp_hwmon_read_string(struct device *dev, > + enum hwmon_sensor_types type, > + u32 attr, int channel, const char **str) > +{ > + const struct ssam_temp *ssam_temp = dev_get_drvdata(dev); > + > + *str = ssam_temp->names[channel]; > + return 0; > +} > + > static const struct hwmon_channel_info * const ssam_temp_hwmon_info[] = { > HWMON_CHANNEL_INFO(chip, > HWMON_C_REGISTER_TZ), > - /* We have at most 16 thermal sensor channels. */ > + /* > + * We have at most SSAM_TMP_SENSOR_MAX_COUNT = 16 thermal sensor > + * channels. > + */ Pointless comment. Already explained above, and perfect example showing why it has no value separating this and the previous patch. > HWMON_CHANNEL_INFO(temp, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT, > - HWMON_T_INPUT), > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL), Another example. Why have me review the previous patch just to change the code here ? [ ... ] > + /* Retrieve the name for each available sensor. */ > + for (channel = 0; channel < SSAM_TMP_SENSOR_MAX_COUNT; channel++) { > + if (!(sensors & BIT(channel))) > + continue; > + > + status = ssam_tmp_get_name(sdev, channel + 1, > + ssam_temp->names[channel], > + SSAM_TMP_SENSOR_NAME_LENGTH); > + if (status) > + return status; Your call to fail probe in this case just because it can not find a sensor name. I personally find that quite aggressive. Guenter
On 4/16/24 3:30 PM, Guenter Roeck wrote: > On Sat, Mar 30, 2024 at 12:24:01PM +0100, Maximilian Luz wrote: [...] >> +static int ssam_tmp_get_name(struct ssam_device *sdev, u8 iid, char *buf, size_t buf_len) >> +{ >> + struct ssam_tmp_get_name_rsp name_rsp; >> + int status; >> + >> + status = __ssam_tmp_get_name(sdev->ctrl, sdev->uid.target, iid, &name_rsp); >> + if (status) >> + return status; >> + >> + /* >> + * This should not fail unless the name in the returned struct is not >> + * null-terminated or someone changed something in the struct >> + * definitions above, since our buffer and struct have the same >> + * capacity by design. So if this fails blow this up with a warning. >> + * Since the more likely cause is that the returned string isn't >> + * null-terminated, we might have received garbage (as opposed to just >> + * an incomplete string), so also fail the function. >> + */ >> + status = strscpy(buf, name_rsp.name, buf_len); >> + WARN_ON(status < 0); > > Not acceptable. From include/asm-generic/bug.h: > > * Do not use these macros when checking for invalid external inputs > * (e.g. invalid system call arguments, or invalid data coming from > * network/devices), and on transient conditions like ENOMEM or EAGAIN. > * These macros should be used for recoverable kernel issues only. > Hmm, I always interpreted that as "do not use for checking user-defined input", which this is not. The reason I added/requested it here was to check for "bugs" in how we think the interface behaves (and our definitions related to it) as the interface was reverse-engineered. Generally, when this fails I expect that we made some mistake in our code (or the things we assume about the interface), which likely causes us to interpret the received data as "garbage" (and not the EC sending corrupted data, which it is generally not due to CRC checking and validation in the SAM driver). Hence, I personally would prefer if this blows up in a big warning with a trace attached to it, so that an end-user can easily report this to us and that we can appropriately deal with it. As opposed to some one-line error message that will likely get overlooked or not taken as seriously. If you still insist, I could change that to a dev_err() message. Or maybe make the comment a bit clearer. >> + >> + return status < 0 ? status : 0; >> +} >> + >> >> /* -- Driver.---------------------------------------------------------------- */ >> >> struct ssam_temp { >> struct ssam_device *sdev; >> s16 sensors; >> + char names[SSAM_TMP_SENSOR_MAX_COUNT][SSAM_TMP_SENSOR_NAME_LENGTH]; >> }; >> >> static umode_t ssam_temp_hwmon_is_visible(const void *data, >> @@ -83,33 +134,47 @@ static int ssam_temp_hwmon_read(struct device *dev, >> return ssam_tmp_get_temperature(ssam_temp->sdev, channel + 1, value); >> } >> >> +static int ssam_temp_hwmon_read_string(struct device *dev, >> + enum hwmon_sensor_types type, >> + u32 attr, int channel, const char **str) >> +{ >> + const struct ssam_temp *ssam_temp = dev_get_drvdata(dev); >> + >> + *str = ssam_temp->names[channel]; >> + return 0; >> +} >> + >> static const struct hwmon_channel_info * const ssam_temp_hwmon_info[] = { >> HWMON_CHANNEL_INFO(chip, >> HWMON_C_REGISTER_TZ), >> - /* We have at most 16 thermal sensor channels. */ >> + /* >> + * We have at most SSAM_TMP_SENSOR_MAX_COUNT = 16 thermal sensor >> + * channels. >> + */ > > Pointless comment. Already explained above, and perfect example showing > why it has no value separating this and the previous patch. I can remove the comment. The reason for it being two separate patches is to retain proper attribution. I am sorry that this has created more work for you. In short, Ivor reverse-engineered the interface calls to get the sensor names and wrote the vast majority of this patch (I only changed some smaller things and gave advice, hence the co-developed-by). Therefore I wanted to give proper attribution to Ivor for his work and not squash it into a single patch. >> HWMON_CHANNEL_INFO(temp, >> - HWMON_T_INPUT, >> - HWMON_T_INPUT, >> - HWMON_T_INPUT, >> - HWMON_T_INPUT, >> - HWMON_T_INPUT, >> - HWMON_T_INPUT, >> - HWMON_T_INPUT, >> - HWMON_T_INPUT, >> - HWMON_T_INPUT, >> - HWMON_T_INPUT, >> - HWMON_T_INPUT, >> - HWMON_T_INPUT, >> - HWMON_T_INPUT, >> - HWMON_T_INPUT, >> - HWMON_T_INPUT, >> - HWMON_T_INPUT), >> + HWMON_T_INPUT | HWMON_T_LABEL, >> + HWMON_T_INPUT | HWMON_T_LABEL, >> + HWMON_T_INPUT | HWMON_T_LABEL, >> + HWMON_T_INPUT | HWMON_T_LABEL, >> + HWMON_T_INPUT | HWMON_T_LABEL, >> + HWMON_T_INPUT | HWMON_T_LABEL, >> + HWMON_T_INPUT | HWMON_T_LABEL, >> + HWMON_T_INPUT | HWMON_T_LABEL, >> + HWMON_T_INPUT | HWMON_T_LABEL, >> + HWMON_T_INPUT | HWMON_T_LABEL, >> + HWMON_T_INPUT | HWMON_T_LABEL, >> + HWMON_T_INPUT | HWMON_T_LABEL, >> + HWMON_T_INPUT | HWMON_T_LABEL, >> + HWMON_T_INPUT | HWMON_T_LABEL, >> + HWMON_T_INPUT | HWMON_T_LABEL, >> + HWMON_T_INPUT | HWMON_T_LABEL), > > Another example. Why have me review the previous patch > just to change the code here ? See above. > > [ ... ] > >> + /* Retrieve the name for each available sensor. */ >> + for (channel = 0; channel < SSAM_TMP_SENSOR_MAX_COUNT; channel++) { >> + if (!(sensors & BIT(channel))) >> + continue; >> + >> + status = ssam_tmp_get_name(sdev, channel + 1, >> + ssam_temp->names[channel], >> + SSAM_TMP_SENSOR_NAME_LENGTH); >> + if (status) >> + return status; > > Your call to fail probe in this case just because it can not find > a sensor name. I personally find that quite aggressive. We generally do not expect this to fail. And I think if this fails, it should either be something wrong with the EC communication (in a way that breaks other things like reading temperature values as well) or in our assumptions of how the interface works. So similar to above, I personally would prefer this to fail in a more noisy way, so that people are more likely to tell us about the failure. As an example: We expect sensor names and the interface for it to be present. However, maybe some device (either one we couldn't test or some future one) does not implement the interface call. Usually, an unsupported call results in a timeout error. So if we would just ignore that, the driver would load slow (as for each name it would wait for the timeout), but users might not notice as the temperature sensors can still be accessed normally after that. I do see that as an easily detectable bug. Letting it continue to load IMHO just creates a more subtle bug. As above, if you prefer I can change that. Just let me know. Thank you for your review. Best regards, Max
On Tue, Apr 16, 2024 at 09:00:05PM +0200, Maximilian Luz wrote: > On 4/16/24 3:30 PM, Guenter Roeck wrote: > > On Sat, Mar 30, 2024 at 12:24:01PM +0100, Maximilian Luz wrote: > > [...] > > > > +static int ssam_tmp_get_name(struct ssam_device *sdev, u8 iid, char *buf, size_t buf_len) > > > +{ > > > + struct ssam_tmp_get_name_rsp name_rsp; > > > + int status; > > > + > > > + status = __ssam_tmp_get_name(sdev->ctrl, sdev->uid.target, iid, &name_rsp); > > > + if (status) > > > + return status; > > > + > > > + /* > > > + * This should not fail unless the name in the returned struct is not > > > + * null-terminated or someone changed something in the struct > > > + * definitions above, since our buffer and struct have the same > > > + * capacity by design. So if this fails blow this up with a warning. > > > + * Since the more likely cause is that the returned string isn't > > > + * null-terminated, we might have received garbage (as opposed to just > > > + * an incomplete string), so also fail the function. > > > + */ > > > + status = strscpy(buf, name_rsp.name, buf_len); > > > + WARN_ON(status < 0); > > > > Not acceptable. From include/asm-generic/bug.h: > > > > * Do not use these macros when checking for invalid external inputs > > * (e.g. invalid system call arguments, or invalid data coming from > > * network/devices), and on transient conditions like ENOMEM or EAGAIN. > > * These macros should be used for recoverable kernel issues only. > > > > Hmm, I always interpreted that as "do not use for checking user-defined > input", which this is not. > "invalid data coming from network/devices" is not user-defined input. > The reason I added/requested it here was to check for "bugs" in how we > think the interface behaves (and our definitions related to it) as the > interface was reverse-engineered. Generally, when this fails I expect > that we made some mistake in our code (or the things we assume about the > interface), which likely causes us to interpret the received data as > "garbage" (and not the EC sending corrupted data, which it is generally > not due to CRC checking and validation in the SAM driver). Hence, I > personally would prefer if this blows up in a big warning with a trace > attached to it, so that an end-user can easily report this to us and > that we can appropriately deal with it. As opposed to some one-line > error message that will likely get overlooked or not taken as seriously. > I have heard the "This backtrace is absolutely essential" argument before, including the "will be fixed" part. Chromebooks report more than 500,000 warning backtraces _every single day_. None of them is getting fixed. > If you still insist, I could change that to a dev_err() message. Or > maybe make the comment a bit clearer. dev_err() would be acceptable. WARN() or WARN_ON() are no-go. Guenter
> Ivor reverse-engineered the interface calls to get the sensor > names and wrote the vast majority of this patch (I only changed some > smaller things and gave advice, hence the co-developed-by). Therefore I > wanted to give proper attribution to Ivor for his work and not squash it > into a single patch. By all means squash them in the next patch revision to make things simpler to review, I don't think it's a large enough piece of work to worry too much about attribution if it makes review more difficult. ~Ivor
On 4/16/24 11:34 PM, Ivor Wanders wrote: >> Ivor reverse-engineered the interface calls to get the sensor >> names and wrote the vast majority of this patch (I only changed some >> smaller things and gave advice, hence the co-developed-by). Therefore I >> wanted to give proper attribution to Ivor for his work and not squash it >> into a single patch. > > By all means squash them in the next patch revision to make things simpler > to review, I don't think it's a large enough piece of work to worry too > much about attribution if it makes review more difficult. Alright, I will do that then. Thanks Ivor! Best regards, Max
Hi, On 4/16/24 11:08 PM, Guenter Roeck wrote: > On Tue, Apr 16, 2024 at 09:00:05PM +0200, Maximilian Luz wrote: >> On 4/16/24 3:30 PM, Guenter Roeck wrote: >>> On Sat, Mar 30, 2024 at 12:24:01PM +0100, Maximilian Luz wrote: >> >> [...] >> >>>> +static int ssam_tmp_get_name(struct ssam_device *sdev, u8 iid, char *buf, size_t buf_len) >>>> +{ >>>> + struct ssam_tmp_get_name_rsp name_rsp; >>>> + int status; >>>> + >>>> + status = __ssam_tmp_get_name(sdev->ctrl, sdev->uid.target, iid, &name_rsp); >>>> + if (status) >>>> + return status; >>>> + >>>> + /* >>>> + * This should not fail unless the name in the returned struct is not >>>> + * null-terminated or someone changed something in the struct >>>> + * definitions above, since our buffer and struct have the same >>>> + * capacity by design. So if this fails blow this up with a warning. >>>> + * Since the more likely cause is that the returned string isn't >>>> + * null-terminated, we might have received garbage (as opposed to just >>>> + * an incomplete string), so also fail the function. >>>> + */ >>>> + status = strscpy(buf, name_rsp.name, buf_len); >>>> + WARN_ON(status < 0); >>> >>> Not acceptable. From include/asm-generic/bug.h: >>> >>> * Do not use these macros when checking for invalid external inputs >>> * (e.g. invalid system call arguments, or invalid data coming from >>> * network/devices), and on transient conditions like ENOMEM or EAGAIN. >>> * These macros should be used for recoverable kernel issues only. >>> >> >> Hmm, I always interpreted that as "do not use for checking user-defined >> input", which this is not. >> > > "invalid data coming from network/devices" is not user-defined input. > >> The reason I added/requested it here was to check for "bugs" in how we >> think the interface behaves (and our definitions related to it) as the >> interface was reverse-engineered. Generally, when this fails I expect >> that we made some mistake in our code (or the things we assume about the >> interface), which likely causes us to interpret the received data as >> "garbage" (and not the EC sending corrupted data, which it is generally >> not due to CRC checking and validation in the SAM driver). Hence, I >> personally would prefer if this blows up in a big warning with a trace >> attached to it, so that an end-user can easily report this to us and >> that we can appropriately deal with it. As opposed to some one-line >> error message that will likely get overlooked or not taken as seriously. >> > > I have heard the "This backtrace is absolutely essential" argument before, > including the "will be fixed" part. Chromebooks report more than 500,000 > warning backtraces _every single day_. None of them is getting fixed. > >> If you still insist, I could change that to a dev_err() message. Or >> maybe make the comment a bit clearer. > > dev_err() would be acceptable. WARN() or WARN_ON() are no-go. Sorry for the delayed response. I will change this to a dev_err() then and try to re-spin the patches this weekend. Best regards, Max
diff --git a/drivers/hwmon/surface_temp.c b/drivers/hwmon/surface_temp.c index 48c3e826713f6..7a2e1f638336c 100644 --- a/drivers/hwmon/surface_temp.c +++ b/drivers/hwmon/surface_temp.c @@ -17,6 +17,27 @@ /* -- SAM interface. -------------------------------------------------------- */ +/* + * Available sensors are indicated by a 16-bit bitfield, where a 1 marks the + * presence of a sensor. So we have at most 16 possible sensors/channels. + */ +#define SSAM_TMP_SENSOR_MAX_COUNT 16 + +/* + * All names observed so far are 6 characters long, but there's only + * zeros after the name, so perhaps they can be longer. This number reflects + * the maximum zero-padded space observed in the returned buffer. + */ +#define SSAM_TMP_SENSOR_NAME_LENGTH 18 + +struct ssam_tmp_get_name_rsp { + __le16 unknown1; + char unknown2; + char name[SSAM_TMP_SENSOR_NAME_LENGTH]; +} __packed; + +static_assert(sizeof(struct ssam_tmp_get_name_rsp) == 21); + SSAM_DEFINE_SYNC_REQUEST_CL_R(__ssam_tmp_get_available_sensors, __le16, { .target_category = SSAM_SSH_TC_TMP, .command_id = 0x04, @@ -27,6 +48,11 @@ SSAM_DEFINE_SYNC_REQUEST_MD_R(__ssam_tmp_get_temperature, __le16, { .command_id = 0x01, }); +SSAM_DEFINE_SYNC_REQUEST_MD_R(__ssam_tmp_get_name, struct ssam_tmp_get_name_rsp, { + .target_category = SSAM_SSH_TC_TMP, + .command_id = 0x0e, +}); + static int ssam_tmp_get_available_sensors(struct ssam_device *sdev, s16 *sensors) { __le16 sensors_le; @@ -54,12 +80,37 @@ static int ssam_tmp_get_temperature(struct ssam_device *sdev, u8 iid, long *temp return 0; } +static int ssam_tmp_get_name(struct ssam_device *sdev, u8 iid, char *buf, size_t buf_len) +{ + struct ssam_tmp_get_name_rsp name_rsp; + int status; + + status = __ssam_tmp_get_name(sdev->ctrl, sdev->uid.target, iid, &name_rsp); + if (status) + return status; + + /* + * This should not fail unless the name in the returned struct is not + * null-terminated or someone changed something in the struct + * definitions above, since our buffer and struct have the same + * capacity by design. So if this fails blow this up with a warning. + * Since the more likely cause is that the returned string isn't + * null-terminated, we might have received garbage (as opposed to just + * an incomplete string), so also fail the function. + */ + status = strscpy(buf, name_rsp.name, buf_len); + WARN_ON(status < 0); + + return status < 0 ? status : 0; +} + /* -- Driver.---------------------------------------------------------------- */ struct ssam_temp { struct ssam_device *sdev; s16 sensors; + char names[SSAM_TMP_SENSOR_MAX_COUNT][SSAM_TMP_SENSOR_NAME_LENGTH]; }; static umode_t ssam_temp_hwmon_is_visible(const void *data, @@ -83,33 +134,47 @@ static int ssam_temp_hwmon_read(struct device *dev, return ssam_tmp_get_temperature(ssam_temp->sdev, channel + 1, value); } +static int ssam_temp_hwmon_read_string(struct device *dev, + enum hwmon_sensor_types type, + u32 attr, int channel, const char **str) +{ + const struct ssam_temp *ssam_temp = dev_get_drvdata(dev); + + *str = ssam_temp->names[channel]; + return 0; +} + static const struct hwmon_channel_info * const ssam_temp_hwmon_info[] = { HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ), - /* We have at most 16 thermal sensor channels. */ + /* + * We have at most SSAM_TMP_SENSOR_MAX_COUNT = 16 thermal sensor + * channels. + */ HWMON_CHANNEL_INFO(temp, - HWMON_T_INPUT, - HWMON_T_INPUT, - HWMON_T_INPUT, - HWMON_T_INPUT, - HWMON_T_INPUT, - HWMON_T_INPUT, - HWMON_T_INPUT, - HWMON_T_INPUT, - HWMON_T_INPUT, - HWMON_T_INPUT, - HWMON_T_INPUT, - HWMON_T_INPUT, - HWMON_T_INPUT, - HWMON_T_INPUT, - HWMON_T_INPUT, - HWMON_T_INPUT), + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL), NULL }; static const struct hwmon_ops ssam_temp_hwmon_ops = { .is_visible = ssam_temp_hwmon_is_visible, .read = ssam_temp_hwmon_read, + .read_string = ssam_temp_hwmon_read_string, }; static const struct hwmon_chip_info ssam_temp_hwmon_chip_info = { @@ -122,6 +187,7 @@ static int ssam_temp_probe(struct ssam_device *sdev) struct ssam_temp *ssam_temp; struct device *hwmon_dev; s16 sensors; + int channel; int status; status = ssam_tmp_get_available_sensors(sdev, &sensors); @@ -135,6 +201,18 @@ static int ssam_temp_probe(struct ssam_device *sdev) ssam_temp->sdev = sdev; ssam_temp->sensors = sensors; + /* Retrieve the name for each available sensor. */ + for (channel = 0; channel < SSAM_TMP_SENSOR_MAX_COUNT; channel++) { + if (!(sensors & BIT(channel))) + continue; + + status = ssam_tmp_get_name(sdev, channel + 1, + ssam_temp->names[channel], + SSAM_TMP_SENSOR_NAME_LENGTH); + if (status) + return status; + } + hwmon_dev = devm_hwmon_device_register_with_info(&sdev->dev, "surface_thermal", ssam_temp, &ssam_temp_hwmon_chip_info, NULL);