diff mbox series

[v3,1/6] drm/xe/hwmon: Add HWMON infrastructure

Message ID 20230802135241.458855-2-badal.nilawar@intel.com (mailing list archive)
State Changes Requested
Headers show
Series Add HWMON support for DGFX | expand

Commit Message

Nilawar, Badal Aug. 2, 2023, 1:52 p.m. UTC
The xe HWMON module will be used to expose voltage, power and energy
values for dGfx. Here we set up xe hwmon infrastructure including xe
hwmon registration, basic data structures and functions.

v2:
  - Fix review comments (Riana)
v3:
  - %s/hwm_/hwmon/ (Matt Brost)
  - Use drmm_mutex_init (Matt Brost)
  - Print error value (Matt Brost)
  - %s/hwmon_drvdata/xe_hwmon_data/
  - Move rpm (xe_device_mem_access_get/put) calls
    to this patch (Matt Brost)

Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
---
 drivers/gpu/drm/xe/Makefile          |   3 +
 drivers/gpu/drm/xe/xe_device.c       |   5 +
 drivers/gpu/drm/xe/xe_device_types.h |   2 +
 drivers/gpu/drm/xe/xe_hwmon.c        | 150 +++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_hwmon.h        |  22 ++++
 5 files changed, 182 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_hwmon.c
 create mode 100644 drivers/gpu/drm/xe/xe_hwmon.h

Comments

Guenter Roeck Aug. 2, 2023, 2:15 p.m. UTC | #1
On 8/2/23 06:52, Badal Nilawar wrote:
> The xe HWMON module will be used to expose voltage, power and energy
> values for dGfx. Here we set up xe hwmon infrastructure including xe
> hwmon registration, basic data structures and functions.
> 
> v2:
>    - Fix review comments (Riana)
> v3:
>    - %s/hwm_/hwmon/ (Matt Brost)
>    - Use drmm_mutex_init (Matt Brost)
>    - Print error value (Matt Brost)
>    - %s/hwmon_drvdata/xe_hwmon_data/
>    - Move rpm (xe_device_mem_access_get/put) calls
>      to this patch (Matt Brost)
> 
> Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
> ---
>   drivers/gpu/drm/xe/Makefile          |   3 +
>   drivers/gpu/drm/xe/xe_device.c       |   5 +
>   drivers/gpu/drm/xe/xe_device_types.h |   2 +
>   drivers/gpu/drm/xe/xe_hwmon.c        | 150 +++++++++++++++++++++++++++
>   drivers/gpu/drm/xe/xe_hwmon.h        |  22 ++++
>   5 files changed, 182 insertions(+)
>   create mode 100644 drivers/gpu/drm/xe/xe_hwmon.c
>   create mode 100644 drivers/gpu/drm/xe/xe_hwmon.h
> 
> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> index 4ea9e3150c20..831be23e000b 100644
> --- a/drivers/gpu/drm/xe/Makefile
> +++ b/drivers/gpu/drm/xe/Makefile
> @@ -116,6 +116,9 @@ xe-y += xe_bb.o \
>   	xe_wa.o \
>   	xe_wopcm.o
>   
> +# graphics hardware monitoring (HWMON) support
> +xe-$(CONFIG_HWMON) += xe_hwmon.o
> +
>   # i915 Display compat #defines and #includes
>   subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \
>   	-I$(srctree)/$(src)/display/ext \
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 5409cf7895d3..01bd08812514 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -34,6 +34,7 @@
>   #include "xe_vm.h"
>   #include "xe_vm_madvise.h"
>   #include "xe_wait_user_fence.h"
> +#include "xe_hwmon.h"
>   
>   #ifdef CONFIG_LOCKDEP
>   struct lockdep_map xe_device_mem_access_lockdep_map = {
> @@ -335,6 +336,8 @@ int xe_device_probe(struct xe_device *xe)
>   
>   	xe_debugfs_register(xe);
>   
> +	xe_hwmon_register(xe);
> +
>   	err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
>   	if (err)
>   		return err;
> @@ -361,6 +364,8 @@ static void xe_device_remove_display(struct xe_device *xe)
>   
>   void xe_device_remove(struct xe_device *xe)
>   {
> +	xe_hwmon_unregister(xe);
> +
>   	xe_device_remove_display(xe);
>   
>   	xe_display_unlink(xe);
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index b156f69d7320..dd06eba815ec 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -376,6 +376,8 @@ struct xe_device {
>   	 */
>   	struct task_struct *pm_callback_task;
>   
> +	struct xe_hwmon *hwmon;
> +
>   	/* private: */
>   
>   #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
> diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
> new file mode 100644
> index 000000000000..5e35128a61a8
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_hwmon.c
> @@ -0,0 +1,150 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2023 Intel Corporation
> + */
> +
> +#include <linux/hwmon.h>
> +
> +#include <drm/drm_managed.h>
> +#include "regs/xe_gt_regs.h"
> +#include "xe_device.h"
> +#include "xe_hwmon.h"
> +
> +struct xe_hwmon_data {
> +	struct device *hwmon_dev;
> +	struct xe_gt *gt;
> +	char name[12];
> +};
> +
> +struct xe_hwmon {
> +	struct xe_hwmon_data ddat;
> +	struct mutex hwmon_lock;
> +};
> +
> +static const struct hwmon_channel_info *hwmon_info[] = {
> +	NULL
> +};
> +
> +static umode_t
> +hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
> +		 u32 attr, int channel)
> +{
> +	struct xe_hwmon_data *ddat = (struct xe_hwmon_data *)drvdata;
> +	int ret;
> +
> +	xe_device_mem_access_get(gt_to_xe(ddat->gt));
> +
> +	switch (type) {
> +	default:
> +		ret = 0;
> +		break;
> +	}
> +
> +	xe_device_mem_access_put(gt_to_xe(ddat->gt));
> +
> +	return ret;
> +}
> +
> +static int
> +hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
> +	   int channel, long *val)
> +{
> +	struct xe_hwmon_data *ddat = dev_get_drvdata(dev);
> +	int ret;
> +
> +	xe_device_mem_access_get(gt_to_xe(ddat->gt));
> +
> +	switch (type) {
> +	default:
> +		ret = -EOPNOTSUPP;
> +		break;
> +	}
> +
> +	xe_device_mem_access_put(gt_to_xe(ddat->gt));
> +
> +	return ret;
> +}
> +
> +static int
> +hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
> +	    int channel, long val)
> +{
> +	struct xe_hwmon_data *ddat = dev_get_drvdata(dev);
> +	int ret;
> +
> +	xe_device_mem_access_get(gt_to_xe(ddat->gt));
> +
> +	switch (type) {
> +	default:
> +		ret = -EOPNOTSUPP;
> +		break;
> +	}
> +
> +	xe_device_mem_access_put(gt_to_xe(ddat->gt));
> +
> +	return ret;
> +}
> +
> +static const struct hwmon_ops hwmon_ops = {
> +	.is_visible = hwmon_is_visible,
> +	.read = hwmon_read,
> +	.write = hwmon_write,
> +};
> +
> +static const struct hwmon_chip_info hwmon_chip_info = {
> +	.ops = &hwmon_ops,
> +	.info = hwmon_info,
> +};
> +
> +static void
> +hwmon_get_preregistration_info(struct xe_device *xe)
> +{
> +}
> +
> +void xe_hwmon_register(struct xe_device *xe)
> +{
> +	struct device *dev = xe->drm.dev;
> +	struct xe_hwmon *hwmon;
> +	struct device *hwmon_dev;
> +	struct xe_hwmon_data *ddat;
> +
> +	/* hwmon is available only for dGfx */
> +	if (!IS_DGFX(xe))
> +		return;
> +
> +	hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
> +	if (!hwmon)
> +		return;
> +
> +	xe->hwmon = hwmon;
> +	drmm_mutex_init(&xe->drm, &hwmon->hwmon_lock);
> +
> +	ddat = &hwmon->ddat;
> +
> +	/* primary GT to access device level properties */
> +	ddat->gt = xe->tiles[0].primary_gt;
> +
> +	snprintf(ddat->name, sizeof(ddat->name), "xe");

Why not just pass "xe" as string to devm_hwmon_device_register_with_info() ?

> +
> +	hwmon_get_preregistration_info(xe);
> +
> +	drm_dbg(&xe->drm, "Register xe hwmon interface\n");
> +
> +	/*  hwmon_dev points to device hwmon<i> */
> +	hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
> +							 ddat,
> +							 &hwmon_chip_info,
> +							 NULL);
> +	if (IS_ERR(hwmon_dev)) {
> +		drm_warn(&xe->drm, "Fail to register xe hwmon, Err:%ld\n", PTR_ERR(hwmon_dev));
> +		xe->hwmon = NULL;
> +		return;
> +	}
> +

What is xe->hwmon used for other than for setting it to NULL
in xe_hwmon_unregister() ?

> +	ddat->hwmon_dev = hwmon_dev;
> +}
> +
> +void xe_hwmon_unregister(struct xe_device *xe)
> +{
> +	xe->hwmon = NULL;
> +}

Not sure I understand why this function is needed. Please explain.

> diff --git a/drivers/gpu/drm/xe/xe_hwmon.h b/drivers/gpu/drm/xe/xe_hwmon.h
> new file mode 100644
> index 000000000000..a078eeb0a68b
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_hwmon.h
> @@ -0,0 +1,22 @@
> +/* SPDX-License-Identifier: MIT */
> +
> +/*
> + * Copyright © 2023 Intel Corporation
> + */
> +
> +#ifndef __XE_HWMON_H__
> +#define __XE_HWMON_H__
> +
> +#include <linux/types.h>
> +
> +struct xe_device;
> +
> +#if IS_REACHABLE(CONFIG_HWMON)
> +void xe_hwmon_register(struct xe_device *xe);
> +void xe_hwmon_unregister(struct xe_device *xe);
> +#else
> +static inline void xe_hwmon_register(struct xe_device *xe) { };
> +static inline void xe_hwmon_unregister(struct xe_device *xe) { };
> +#endif
> +
> +#endif /* __XE_HWMON_H__ */
Andi Shyti Aug. 2, 2023, 10:40 p.m. UTC | #2
Hi Badal,

[...]

> +struct xe_hwmon_data {
> +	struct device *hwmon_dev;
> +	struct xe_gt *gt;
> +	char name[12];
> +};
> +
> +struct xe_hwmon {
> +	struct xe_hwmon_data ddat;
> +	struct mutex hwmon_lock;
> +};

why do we need two structures here? Can we merge them?

> +static const struct hwmon_channel_info *hwmon_info[] = {
> +	NULL
> +};

just:

  static const struct hwmon_channel_info *hwmon_info[] = { };

would do.

> +static umode_t
> +hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
> +		 u32 attr, int channel)
> +{
> +	struct xe_hwmon_data *ddat = (struct xe_hwmon_data *)drvdata;
> +	int ret;
> +
> +	xe_device_mem_access_get(gt_to_xe(ddat->gt));
> +
> +	switch (type) {
> +	default:
> +		ret = 0;
> +		break;
> +	}
> +
> +	xe_device_mem_access_put(gt_to_xe(ddat->gt));
> +
> +	return ret;

OK... we are forced to go through the switch and initialize ret.
Would be nicer to initialize ret to '0'... but it's not
important, feel free to ignore this comment if the compiler
doesn't complain.

> +}

[...]

> +	/*  hwmon_dev points to device hwmon<i> */
> +	hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
> +							 ddat,
> +							 &hwmon_chip_info,
> +							 NULL);
> +	if (IS_ERR(hwmon_dev)) {
> +		drm_warn(&xe->drm, "Fail to register xe hwmon, Err:%ld\n", PTR_ERR(hwmon_dev));

I think this is better:

   drm_warn(&xe->drm, "Fail to register xe hwmon (%pe)\n", hwmon_dev);

> +		xe->hwmon = NULL;
> +		return;
> +	}
> +
> +	ddat->hwmon_dev = hwmon_dev;
> +}
> +
> +void xe_hwmon_unregister(struct xe_device *xe)
> +{
> +	xe->hwmon = NULL;

I think this is not necessary. Will xe check for hwmon at some
point?

Andi

> +}
Guenter Roeck Aug. 2, 2023, 11:11 p.m. UTC | #3
On 8/2/23 15:40, Andi Shyti wrote:
> Hi Badal,
> 
> [...]
> 
>> +struct xe_hwmon_data {
>> +	struct device *hwmon_dev;
>> +	struct xe_gt *gt;
>> +	char name[12];
>> +};
>> +
>> +struct xe_hwmon {
>> +	struct xe_hwmon_data ddat;
>> +	struct mutex hwmon_lock;
>> +};
> 
> why do we need two structures here? Can we merge them?
> 
>> +static const struct hwmon_channel_info *hwmon_info[] = {
>> +	NULL
>> +};
> 
> just:
> 
>    static const struct hwmon_channel_info *hwmon_info[] = { };
> 
> would do.
> 

What would be the point ? This is just a placeholder, and we need
a NULL entry at the end.

>> +static umode_t
>> +hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
>> +		 u32 attr, int channel)
>> +{
>> +	struct xe_hwmon_data *ddat = (struct xe_hwmon_data *)drvdata;
>> +	int ret;
>> +
>> +	xe_device_mem_access_get(gt_to_xe(ddat->gt));
>> +
>> +	switch (type) {
>> +	default:
>> +		ret = 0;
>> +		break;
>> +	}
>> +
>> +	xe_device_mem_access_put(gt_to_xe(ddat->gt));
>> +
>> +	return ret;
> 
> OK... we are forced to go through the switch and initialize ret.
> Would be nicer to initialize ret to '0'... but it's not
> important, feel free to ignore this comment if the compiler
> doesn't complain.
> 
>> +}
> 
> [...]
> 
>> +	/*  hwmon_dev points to device hwmon<i> */
>> +	hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
>> +							 ddat,
>> +							 &hwmon_chip_info,
>> +							 NULL);
>> +	if (IS_ERR(hwmon_dev)) {
>> +		drm_warn(&xe->drm, "Fail to register xe hwmon, Err:%ld\n", PTR_ERR(hwmon_dev));
> 
> I think this is better:
> 
>     drm_warn(&xe->drm, "Fail to register xe hwmon (%pe)\n", hwmon_dev);
> 
>> +		xe->hwmon = NULL;
>> +		return;
>> +	}
>> +
>> +	ddat->hwmon_dev = hwmon_dev;
>> +}
>> +
>> +void xe_hwmon_unregister(struct xe_device *xe)
>> +{
>> +	xe->hwmon = NULL;
> 
> I think this is not necessary. Will xe check for hwmon at some
> point?
> 
> Andi
> 
>> +}
Guenter Roeck Aug. 2, 2023, 11:12 p.m. UTC | #4
On 8/2/23 15:40, Andi Shyti wrote:
> Hi Badal,
> 
> [...]
> 
>> +struct xe_hwmon_data {
>> +	struct device *hwmon_dev;
>> +	struct xe_gt *gt;
>> +	char name[12];
>> +};
>> +
>> +struct xe_hwmon {
>> +	struct xe_hwmon_data ddat;
>> +	struct mutex hwmon_lock;
>> +};
> 
> why do we need two structures here? Can we merge them?
> 

A later patch adds multiple hwmon devices which makes use of it.
I think that is flawed, and I am not inclined to accept it.

Guenter

>> +static const struct hwmon_channel_info *hwmon_info[] = {
>> +	NULL
>> +};
> 
> just:
> 
>    static const struct hwmon_channel_info *hwmon_info[] = { };
> 
> would do.
> 
>> +static umode_t
>> +hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
>> +		 u32 attr, int channel)
>> +{
>> +	struct xe_hwmon_data *ddat = (struct xe_hwmon_data *)drvdata;
>> +	int ret;
>> +
>> +	xe_device_mem_access_get(gt_to_xe(ddat->gt));
>> +
>> +	switch (type) {
>> +	default:
>> +		ret = 0;
>> +		break;
>> +	}
>> +
>> +	xe_device_mem_access_put(gt_to_xe(ddat->gt));
>> +
>> +	return ret;
> 
> OK... we are forced to go through the switch and initialize ret.
> Would be nicer to initialize ret to '0'... but it's not
> important, feel free to ignore this comment if the compiler
> doesn't complain.
> 
>> +}
> 
> [...]
> 
>> +	/*  hwmon_dev points to device hwmon<i> */
>> +	hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
>> +							 ddat,
>> +							 &hwmon_chip_info,
>> +							 NULL);
>> +	if (IS_ERR(hwmon_dev)) {
>> +		drm_warn(&xe->drm, "Fail to register xe hwmon, Err:%ld\n", PTR_ERR(hwmon_dev));
> 
> I think this is better:
> 
>     drm_warn(&xe->drm, "Fail to register xe hwmon (%pe)\n", hwmon_dev);
> 
>> +		xe->hwmon = NULL;
>> +		return;
>> +	}
>> +
>> +	ddat->hwmon_dev = hwmon_dev;
>> +}
>> +
>> +void xe_hwmon_unregister(struct xe_device *xe)
>> +{
>> +	xe->hwmon = NULL;
> 
> I think this is not necessary. Will xe check for hwmon at some
> point?
> 
> Andi
> 
>> +}
Andi Shyti Aug. 2, 2023, 11:34 p.m. UTC | #5
Hi Guenter,

[...]

> > > +static const struct hwmon_channel_info *hwmon_info[] = {
> > > +	NULL
> > > +};
> > 
> > just:
> > 
> >    static const struct hwmon_channel_info *hwmon_info[] = { };
> > 
> > would do.
> > 
> 
> What would be the point ? This is just a placeholder, and we need
> a NULL entry at the end.

right... this series needs to be read from the end to the
beginning :)

Andi
Guenter Roeck Aug. 3, 2023, 12:06 a.m. UTC | #6
On 8/2/23 16:34, Andi Shyti wrote:
> Hi Guenter,
> 
> [...]
> 
>>>> +static const struct hwmon_channel_info *hwmon_info[] = {
>>>> +	NULL
>>>> +};
>>>
>>> just:
>>>
>>>     static const struct hwmon_channel_info *hwmon_info[] = { };
>>>
>>> would do.
>>>
>>
>> What would be the point ? This is just a placeholder, and we need
>> a NULL entry at the end.
> 
> right... this series needs to be read from the end to the
> beginning :)
> 

Or applied completely, review the result, and then dig back to
the individual patches to provide feedback. I got severely confused
when trying to review the series. I am not really sure if the split
into multiple patches is really making the review easier; it seems
to me it does the opposite.

Guenter
Nilawar, Badal Aug. 4, 2023, 1:19 p.m. UTC | #7
Hi Guenter,
On 03-08-2023 04:42, Guenter Roeck wrote:
> On 8/2/23 15:40, Andi Shyti wrote:
>> Hi Badal,
>>
>> [...]
>>
>>> +struct xe_hwmon_data {
>>> +    struct device *hwmon_dev;
>>> +    struct xe_gt *gt;
>>> +    char name[12];
>>> +};
>>> +
>>> +struct xe_hwmon {
>>> +    struct xe_hwmon_data ddat;
>>> +    struct mutex hwmon_lock;
>>> +};
>>
>> why do we need two structures here? Can we merge them?
>>
> 
> A later patch adds multiple hwmon devices which makes use of it.
> I think that is flawed, and I am not inclined to accept it.
Is there any obvious reason that there shouldn't be multiple devices? In 
i915 we are doing the same. 
https://patchwork.freedesktop.org/patch/497324/?series=104278&rev=3

Regards,
Badal
> 
> Guenter
> 
>>> +static const struct hwmon_channel_info *hwmon_info[] = {
>>> +    NULL
>>> +};
>>
>> just:
>>
>>    static const struct hwmon_channel_info *hwmon_info[] = { };
>>
>> would do.
>>
>>> +static umode_t
>>> +hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
>>> +         u32 attr, int channel)
>>> +{
>>> +    struct xe_hwmon_data *ddat = (struct xe_hwmon_data *)drvdata;
>>> +    int ret;
>>> +
>>> +    xe_device_mem_access_get(gt_to_xe(ddat->gt));
>>> +
>>> +    switch (type) {
>>> +    default:
>>> +        ret = 0;
>>> +        break;
>>> +    }
>>> +
>>> +    xe_device_mem_access_put(gt_to_xe(ddat->gt));
>>> +
>>> +    return ret;
>>
>> OK... we are forced to go through the switch and initialize ret.
>> Would be nicer to initialize ret to '0'... but it's not
>> important, feel free to ignore this comment if the compiler
>> doesn't complain.
>>
>>> +}
>>
>> [...]
>>
>>> +    /*  hwmon_dev points to device hwmon<i> */
>>> +    hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
>>> +                             ddat,
>>> +                             &hwmon_chip_info,
>>> +                             NULL);
>>> +    if (IS_ERR(hwmon_dev)) {
>>> +        drm_warn(&xe->drm, "Fail to register xe hwmon, Err:%ld\n", 
>>> PTR_ERR(hwmon_dev));
>>
>> I think this is better:
>>
>>     drm_warn(&xe->drm, "Fail to register xe hwmon (%pe)\n", hwmon_dev);
>>
>>> +        xe->hwmon = NULL;
>>> +        return;
>>> +    }
>>> +
>>> +    ddat->hwmon_dev = hwmon_dev;
>>> +}
>>> +
>>> +void xe_hwmon_unregister(struct xe_device *xe)
>>> +{
>>> +    xe->hwmon = NULL;
>>
>> I think this is not necessary. Will xe check for hwmon at some
>> point?
>>
>> Andi
>>
>>> +}
>
Nilawar, Badal Aug. 4, 2023, 1:25 p.m. UTC | #8
On 03-08-2023 04:10, Andi Shyti wrote:
> Hi Badal,
> 
> [...]
> 
>> +struct xe_hwmon_data {
>> +	struct device *hwmon_dev;
>> +	struct xe_gt *gt;
>> +	char name[12];
>> +};
>> +
>> +struct xe_hwmon {
>> +	struct xe_hwmon_data ddat;
>> +	struct mutex hwmon_lock;
>> +};
> 
> why do we need two structures here? Can we merge them?
In my previous series I mentioned its require to hold multiple hwmon 
devices.
> 
>> +static const struct hwmon_channel_info *hwmon_info[] = {
>> +	NULL
>> +};
> 
> just:
> 
>    static const struct hwmon_channel_info *hwmon_info[] = { };
> 
> would do.
sure
> 
>> +static umode_t
>> +hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
>> +		 u32 attr, int channel)
>> +{
>> +	struct xe_hwmon_data *ddat = (struct xe_hwmon_data *)drvdata;
>> +	int ret;
>> +
>> +	xe_device_mem_access_get(gt_to_xe(ddat->gt));
>> +
>> +	switch (type) {
>> +	default:
>> +		ret = 0;
>> +		break;
>> +	}
>> +
>> +	xe_device_mem_access_put(gt_to_xe(ddat->gt));
>> +
>> +	return ret;
> 
> OK... we are forced to go through the switch and initialize ret.
> Would be nicer to initialize ret to '0'... but it's not
> important, feel free to ignore this comment if the compiler
> doesn't complain.
> 
>> +}
> 
> [...]
> 
>> +	/*  hwmon_dev points to device hwmon<i> */
>> +	hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
>> +							 ddat,
>> +							 &hwmon_chip_info,
>> +							 NULL);
>> +	if (IS_ERR(hwmon_dev)) {
>> +		drm_warn(&xe->drm, "Fail to register xe hwmon, Err:%ld\n", PTR_ERR(hwmon_dev));
> 
> I think this is better:
> 
>     drm_warn(&xe->drm, "Fail to register xe hwmon (%pe)\n", hwmon_dev);
Sure
> 
>> +		xe->hwmon = NULL;
>> +		return;
>> +	}
>> +
>> +	ddat->hwmon_dev = hwmon_dev;
>> +}
>> +
>> +void xe_hwmon_unregister(struct xe_device *xe)
>> +{
>> +	xe->hwmon = NULL;
> 
> I think this is not necessary. Will xe check for hwmon at some
> point?
Yes this not needed as we are using devm_hwmon* function to register 
hwmon but in i915 patches this was suggested for sanity. I will remove 
this function.

Regards,
Badal
> 
> Andi
> 
>> +}
Guenter Roeck Aug. 4, 2023, 2:26 p.m. UTC | #9
On 8/4/23 06:19, Nilawar, Badal wrote:
> 
> Hi Guenter,
> On 03-08-2023 04:42, Guenter Roeck wrote:
>> On 8/2/23 15:40, Andi Shyti wrote:
>>> Hi Badal,
>>>
>>> [...]
>>>
>>>> +struct xe_hwmon_data {
>>>> +    struct device *hwmon_dev;
>>>> +    struct xe_gt *gt;
>>>> +    char name[12];
>>>> +};
>>>> +
>>>> +struct xe_hwmon {
>>>> +    struct xe_hwmon_data ddat;
>>>> +    struct mutex hwmon_lock;
>>>> +};
>>>
>>> why do we need two structures here? Can we merge them?
>>>
>>
>> A later patch adds multiple hwmon devices which makes use of it.
>> I think that is flawed, and I am not inclined to accept it.
> Is there any obvious reason that there shouldn't be multiple devices? In i915 we are doing the same. https://patchwork.freedesktop.org/patch/497324/?series=104278&rev=3
> 

Technically you can do whatever you like as long as the code doesn't reside
in drivers/hwmon. I won't NACK it, but I won't give it a Reviewed-by:
either. i915 shouldn't do it, but I didn't realize what they are doing
at the time. Other drivers doing it wrong is not an argument. You can't
argue that you may drive faster than the speed limit because others do it
or because police didn't stop you last time you did either.

One chip, one hwmon device. Do you have separate parent devices for
all your hwmon devices ? If yes, you can argue that having multiple hwmon
devices make sense. If not, you can't.

Guenter
Nilawar, Badal Aug. 4, 2023, 2:36 p.m. UTC | #10
On 04-08-2023 19:56, Guenter Roeck wrote:
> On 8/4/23 06:19, Nilawar, Badal wrote:
>>
>> Hi Guenter,
>> On 03-08-2023 04:42, Guenter Roeck wrote:
>>> On 8/2/23 15:40, Andi Shyti wrote:
>>>> Hi Badal,
>>>>
>>>> [...]
>>>>
>>>>> +struct xe_hwmon_data {
>>>>> +    struct device *hwmon_dev;
>>>>> +    struct xe_gt *gt;
>>>>> +    char name[12];
>>>>> +};
>>>>> +
>>>>> +struct xe_hwmon {
>>>>> +    struct xe_hwmon_data ddat;
>>>>> +    struct mutex hwmon_lock;
>>>>> +};
>>>>
>>>> why do we need two structures here? Can we merge them?
>>>>
>>>
>>> A later patch adds multiple hwmon devices which makes use of it.
>>> I think that is flawed, and I am not inclined to accept it.
>> Is there any obvious reason that there shouldn't be multiple devices? 
>> In i915 we are doing the same. 
>> https://patchwork.freedesktop.org/patch/497324/?series=104278&rev=3
>>
> 
> Technically you can do whatever you like as long as the code doesn't reside
> in drivers/hwmon. I won't NACK it, but I won't give it a Reviewed-by:
> either. i915 shouldn't do it, but I didn't realize what they are doing
> at the time. Other drivers doing it wrong is not an argument. You can't
> argue that you may drive faster than the speed limit because others do it
> or because police didn't stop you last time you did either.
> 
> One chip, one hwmon device. Do you have separate parent devices for
> all your hwmon devices ? If yes, you can argue that having multiple hwmon
> devices make sense. If not, you can't.
Thanks for clarification. There is only one parent device. So will try 
to accommodate single hwmon device.

Regards,
Badal
> 
> Guenter
>
Nilawar, Badal Aug. 4, 2023, 2:43 p.m. UTC | #11
On 04-08-2023 19:56, Guenter Roeck wrote:
> On 8/4/23 06:19, Nilawar, Badal wrote:
>>
>> Hi Guenter,
>> On 03-08-2023 04:42, Guenter Roeck wrote:
>>> On 8/2/23 15:40, Andi Shyti wrote:
>>>> Hi Badal,
>>>>
>>>> [...]
>>>>
>>>>> +struct xe_hwmon_data {
>>>>> +    struct device *hwmon_dev;
>>>>> +    struct xe_gt *gt;
>>>>> +    char name[12];
>>>>> +};
>>>>> +
>>>>> +struct xe_hwmon {
>>>>> +    struct xe_hwmon_data ddat;
>>>>> +    struct mutex hwmon_lock;
>>>>> +};
>>>>
>>>> why do we need two structures here? Can we merge them?
>>>>
>>>
>>> A later patch adds multiple hwmon devices which makes use of it.
>>> I think that is flawed, and I am not inclined to accept it.
>> Is there any obvious reason that there shouldn't be multiple devices? 
>> In i915 we are doing the same. 
>> https://patchwork.freedesktop.org/patch/497324/?series=104278&rev=3
>>
> 
> Technically you can do whatever you like as long as the code doesn't reside
> in drivers/hwmon. I won't NACK it, but I won't give it a Reviewed-by:
> either. i915 shouldn't do it, but I didn't realize what they are doing
> at the time. Other drivers doing it wrong is not an argument. You can't
> argue that you may drive faster than the speed limit because others do it
> or because police didn't stop you last time you did either.
> 
> One chip, one hwmon device. Do you have separate parent devices for
> all your hwmon devices ? If yes, you can argue that having multiple hwmon
> devices make sense. If not, you can't.
Thanks for clarification. There is only one parent device, so will try 
to accommodate one hwmon device approach.

Regards,
Badal
> 
> Guenter
>
Rodrigo Vivi Aug. 8, 2023, 9:31 p.m. UTC | #12
On Fri, Aug 04, 2023 at 08:06:22PM +0530, Nilawar, Badal wrote:
> 
> 
> On 04-08-2023 19:56, Guenter Roeck wrote:
> > On 8/4/23 06:19, Nilawar, Badal wrote:
> > > 
> > > Hi Guenter,
> > > On 03-08-2023 04:42, Guenter Roeck wrote:
> > > > On 8/2/23 15:40, Andi Shyti wrote:
> > > > > Hi Badal,
> > > > > 
> > > > > [...]
> > > > > 
> > > > > > +struct xe_hwmon_data {
> > > > > > +    struct device *hwmon_dev;
> > > > > > +    struct xe_gt *gt;
> > > > > > +    char name[12];
> > > > > > +};
> > > > > > +
> > > > > > +struct xe_hwmon {
> > > > > > +    struct xe_hwmon_data ddat;
> > > > > > +    struct mutex hwmon_lock;
> > > > > > +};
> > > > > 
> > > > > why do we need two structures here? Can we merge them?
> > > > > 
> > > > 
> > > > A later patch adds multiple hwmon devices which makes use of it.
> > > > I think that is flawed, and I am not inclined to accept it.
> > > Is there any obvious reason that there shouldn't be multiple
> > > devices? In i915 we are doing the same.
> > > https://patchwork.freedesktop.org/patch/497324/?series=104278&rev=3
> > > 
> > 
> > Technically you can do whatever you like as long as the code doesn't reside
> > in drivers/hwmon. I won't NACK it, but I won't give it a Reviewed-by:
> > either. i915 shouldn't do it, but I didn't realize what they are doing
> > at the time. Other drivers doing it wrong is not an argument. You can't
> > argue that you may drive faster than the speed limit because others do it
> > or because police didn't stop you last time you did either.
> > 
> > One chip, one hwmon device. Do you have separate parent devices for
> > all your hwmon devices ? If yes, you can argue that having multiple hwmon
> > devices make sense. If not, you can't.
> Thanks for clarification. There is only one parent device. So will try to
> accommodate single hwmon device.

Well, it is one PCI device, but under 1 pci device we can have multiple "tiles"
that can duplicate many components. Inside each tile we can even have multiple
"gt"s.

But back to the tile, each tile has its own metrics. It's own power delivery,
own sensors and all. They can even be seen as independent devices from this
angle.

I'm afraid that the attempt to put everything as one device, but all the
entries duplicated per tile/gt we might end up with a messed api.

> 
> Regards,
> Badal
> > 
> > Guenter
> >
Guenter Roeck Aug. 8, 2023, 10:07 p.m. UTC | #13
On 8/8/23 14:31, Rodrigo Vivi wrote:
> On Fri, Aug 04, 2023 at 08:06:22PM +0530, Nilawar, Badal wrote:
>>
>>
>> On 04-08-2023 19:56, Guenter Roeck wrote:
>>> On 8/4/23 06:19, Nilawar, Badal wrote:
>>>>
>>>> Hi Guenter,
>>>> On 03-08-2023 04:42, Guenter Roeck wrote:
>>>>> On 8/2/23 15:40, Andi Shyti wrote:
>>>>>> Hi Badal,
>>>>>>
>>>>>> [...]
>>>>>>
>>>>>>> +struct xe_hwmon_data {
>>>>>>> +    struct device *hwmon_dev;
>>>>>>> +    struct xe_gt *gt;
>>>>>>> +    char name[12];
>>>>>>> +};
>>>>>>> +
>>>>>>> +struct xe_hwmon {
>>>>>>> +    struct xe_hwmon_data ddat;
>>>>>>> +    struct mutex hwmon_lock;
>>>>>>> +};
>>>>>>
>>>>>> why do we need two structures here? Can we merge them?
>>>>>>
>>>>>
>>>>> A later patch adds multiple hwmon devices which makes use of it.
>>>>> I think that is flawed, and I am not inclined to accept it.
>>>> Is there any obvious reason that there shouldn't be multiple
>>>> devices? In i915 we are doing the same.
>>>> https://patchwork.freedesktop.org/patch/497324/?series=104278&rev=3
>>>>
>>>
>>> Technically you can do whatever you like as long as the code doesn't reside
>>> in drivers/hwmon. I won't NACK it, but I won't give it a Reviewed-by:
>>> either. i915 shouldn't do it, but I didn't realize what they are doing
>>> at the time. Other drivers doing it wrong is not an argument. You can't
>>> argue that you may drive faster than the speed limit because others do it
>>> or because police didn't stop you last time you did either.
>>>
>>> One chip, one hwmon device. Do you have separate parent devices for
>>> all your hwmon devices ? If yes, you can argue that having multiple hwmon
>>> devices make sense. If not, you can't.
>> Thanks for clarification. There is only one parent device. So will try to
>> accommodate single hwmon device.
> 
> Well, it is one PCI device, but under 1 pci device we can have multiple "tiles"
> that can duplicate many components. Inside each tile we can even have multiple
> "gt"s.
> 
> But back to the tile, each tile has its own metrics. It's own power delivery,
> own sensors and all. They can even be seen as independent devices from this
> angle.
> 
> I'm afraid that the attempt to put everything as one device, but all the
> entries duplicated per tile/gt we might end up with a messed api.
> 

Your argument does not make sense. I am not asking to duplicate anything.

Guenter
Rodrigo Vivi Aug. 11, 2023, 4:01 p.m. UTC | #14
On Tue, Aug 08, 2023 at 03:07:43PM -0700, Guenter Roeck wrote:
> On 8/8/23 14:31, Rodrigo Vivi wrote:
> > On Fri, Aug 04, 2023 at 08:06:22PM +0530, Nilawar, Badal wrote:
> > > 
> > > 
> > > On 04-08-2023 19:56, Guenter Roeck wrote:
> > > > On 8/4/23 06:19, Nilawar, Badal wrote:
> > > > > 
> > > > > Hi Guenter,
> > > > > On 03-08-2023 04:42, Guenter Roeck wrote:
> > > > > > On 8/2/23 15:40, Andi Shyti wrote:
> > > > > > > Hi Badal,
> > > > > > > 
> > > > > > > [...]
> > > > > > > 
> > > > > > > > +struct xe_hwmon_data {
> > > > > > > > +    struct device *hwmon_dev;
> > > > > > > > +    struct xe_gt *gt;
> > > > > > > > +    char name[12];
> > > > > > > > +};
> > > > > > > > +
> > > > > > > > +struct xe_hwmon {
> > > > > > > > +    struct xe_hwmon_data ddat;
> > > > > > > > +    struct mutex hwmon_lock;
> > > > > > > > +};
> > > > > > > 
> > > > > > > why do we need two structures here? Can we merge them?
> > > > > > > 
> > > > > > 
> > > > > > A later patch adds multiple hwmon devices which makes use of it.
> > > > > > I think that is flawed, and I am not inclined to accept it.
> > > > > Is there any obvious reason that there shouldn't be multiple
> > > > > devices? In i915 we are doing the same.
> > > > > https://patchwork.freedesktop.org/patch/497324/?series=104278&rev=3
> > > > > 
> > > > 
> > > > Technically you can do whatever you like as long as the code doesn't reside
> > > > in drivers/hwmon. I won't NACK it, but I won't give it a Reviewed-by:
> > > > either. i915 shouldn't do it, but I didn't realize what they are doing
> > > > at the time. Other drivers doing it wrong is not an argument. You can't
> > > > argue that you may drive faster than the speed limit because others do it
> > > > or because police didn't stop you last time you did either.
> > > > 
> > > > One chip, one hwmon device. Do you have separate parent devices for
> > > > all your hwmon devices ? If yes, you can argue that having multiple hwmon
> > > > devices make sense. If not, you can't.
> > > Thanks for clarification. There is only one parent device. So will try to
> > > accommodate single hwmon device.
> > 
> > Well, it is one PCI device, but under 1 pci device we can have multiple "tiles"
> > that can duplicate many components. Inside each tile we can even have multiple
> > "gt"s.
> > 
> > But back to the tile, each tile has its own metrics. It's own power delivery,
> > own sensors and all. They can even be seen as independent devices from this
> > angle.
> > 
> > I'm afraid that the attempt to put everything as one device, but all the
> > entries duplicated per tile/gt we might end up with a messed api.
> > 
> 
> Your argument does not make sense. I am not asking to duplicate anything.

Okay, I'm sorry, maybe 'duplication' was a bad choice of words from my part.

You had told that having multiple hwmon device for a single chip was not
acceptable.

But I'm trying to explain that we have a hardware architecture where the graphics
is duplicated in 'tiles' inside the same PCI card. Each tile with its
own sensors and monitoring systems. And also an extra sensors monitoring the
entire 'package' that includes the tiles and the SoC.
So 1 hwmon device per gt-tile and package sound the appropriated way to me.

Your lines had convinced Badal to get them all and merge in a single hwmon
device. If we do this, the API will get messed up.

And this is what I meant by 'messed up':
quoting Badal:
"""
With single device energy entries will look like hwmonxx/energy1_input,
energy2_input, energy3_input.
To identify which entry for what need to expose additional entry energyX_lable
which will contain ("package", "gtN")
"""

I am arguing that for this tiled ('sub-device') hw architecture the
initial patch from Badal, that is the same way implemented in i915 is
absolutely the right way to go with the current infrastructure.

One idea that I had the first time that I saw this was if it wouldn't
be acceptable for instance in hwmon an infra with named (numbered?)
sub-directories inside the the hwmon device. But I couldn't not find any
other hwmon usage out there that could justify a big change in the core of
hwmon like that.

Well, but maybe we are indeed missing some other better way of handling
this sub-device case with the current hwmon infrastructure. If so, I'd
like to hear more about the suggestions and get some initial pointers.

Thanks so much for your time and help here,
Rodrigo.

> 
> Guenter
>
Guenter Roeck Aug. 11, 2023, 5:39 p.m. UTC | #15
On 8/11/23 09:01, Rodrigo Vivi wrote:
> On Tue, Aug 08, 2023 at 03:07:43PM -0700, Guenter Roeck wrote:
>> On 8/8/23 14:31, Rodrigo Vivi wrote:
>>> On Fri, Aug 04, 2023 at 08:06:22PM +0530, Nilawar, Badal wrote:
>>>>
>>>>
>>>> On 04-08-2023 19:56, Guenter Roeck wrote:
>>>>> On 8/4/23 06:19, Nilawar, Badal wrote:
>>>>>>
>>>>>> Hi Guenter,
>>>>>> On 03-08-2023 04:42, Guenter Roeck wrote:
>>>>>>> On 8/2/23 15:40, Andi Shyti wrote:
>>>>>>>> Hi Badal,
>>>>>>>>
>>>>>>>> [...]
>>>>>>>>
>>>>>>>>> +struct xe_hwmon_data {
>>>>>>>>> +    struct device *hwmon_dev;
>>>>>>>>> +    struct xe_gt *gt;
>>>>>>>>> +    char name[12];
>>>>>>>>> +};
>>>>>>>>> +
>>>>>>>>> +struct xe_hwmon {
>>>>>>>>> +    struct xe_hwmon_data ddat;
>>>>>>>>> +    struct mutex hwmon_lock;
>>>>>>>>> +};
>>>>>>>>
>>>>>>>> why do we need two structures here? Can we merge them?
>>>>>>>>
>>>>>>>
>>>>>>> A later patch adds multiple hwmon devices which makes use of it.
>>>>>>> I think that is flawed, and I am not inclined to accept it.
>>>>>> Is there any obvious reason that there shouldn't be multiple
>>>>>> devices? In i915 we are doing the same.
>>>>>> https://patchwork.freedesktop.org/patch/497324/?series=104278&rev=3
>>>>>>
>>>>>
>>>>> Technically you can do whatever you like as long as the code doesn't reside
>>>>> in drivers/hwmon. I won't NACK it, but I won't give it a Reviewed-by:
>>>>> either. i915 shouldn't do it, but I didn't realize what they are doing
>>>>> at the time. Other drivers doing it wrong is not an argument. You can't
>>>>> argue that you may drive faster than the speed limit because others do it
>>>>> or because police didn't stop you last time you did either.
>>>>>
>>>>> One chip, one hwmon device. Do you have separate parent devices for
>>>>> all your hwmon devices ? If yes, you can argue that having multiple hwmon
>>>>> devices make sense. If not, you can't.
>>>> Thanks for clarification. There is only one parent device. So will try to
>>>> accommodate single hwmon device.
>>>
>>> Well, it is one PCI device, but under 1 pci device we can have multiple "tiles"
>>> that can duplicate many components. Inside each tile we can even have multiple
>>> "gt"s.
>>>
>>> But back to the tile, each tile has its own metrics. It's own power delivery,
>>> own sensors and all. They can even be seen as independent devices from this
>>> angle.
>>>
>>> I'm afraid that the attempt to put everything as one device, but all the
>>> entries duplicated per tile/gt we might end up with a messed api.
>>>
>>
>> Your argument does not make sense. I am not asking to duplicate anything.
> 
> Okay, I'm sorry, maybe 'duplication' was a bad choice of words from my part.
> 
> You had told that having multiple hwmon device for a single chip was not
> acceptable.
> 
> But I'm trying to explain that we have a hardware architecture where the graphics
> is duplicated in 'tiles' inside the same PCI card. Each tile with its
> own sensors and monitoring systems. And also an extra sensors monitoring the
> entire 'package' that includes the tiles and the SoC.
> So 1 hwmon device per gt-tile and package sound the appropriated way to me.
> 

No, it isn't. Next you are going to tell me to split CPU temperature devices
in the same way because they are split in "tiles" on the same CPU core.

> Your lines had convinced Badal to get them all and merge in a single hwmon
> device. If we do this, the API will get messed up.
> 
> And this is what I meant by 'messed up':
> quoting Badal:
> """
> With single device energy entries will look like hwmonxx/energy1_input,
> energy2_input, energy3_input.
> To identify which entry for what need to expose additional entry energyX_lable
> which will contain ("package", "gtN")

So what is the problem with that ? That is a description and not "messed up".

Guenter
Rodrigo Vivi Aug. 11, 2023, 6:48 p.m. UTC | #16
On Fri, Aug 11, 2023 at 10:39:00AM -0700, Guenter Roeck wrote:
> On 8/11/23 09:01, Rodrigo Vivi wrote:
> > On Tue, Aug 08, 2023 at 03:07:43PM -0700, Guenter Roeck wrote:
> > > On 8/8/23 14:31, Rodrigo Vivi wrote:
> > > > On Fri, Aug 04, 2023 at 08:06:22PM +0530, Nilawar, Badal wrote:
> > > > > 
> > > > > 
> > > > > On 04-08-2023 19:56, Guenter Roeck wrote:
> > > > > > On 8/4/23 06:19, Nilawar, Badal wrote:
> > > > > > > 
> > > > > > > Hi Guenter,
> > > > > > > On 03-08-2023 04:42, Guenter Roeck wrote:
> > > > > > > > On 8/2/23 15:40, Andi Shyti wrote:
> > > > > > > > > Hi Badal,
> > > > > > > > > 
> > > > > > > > > [...]
> > > > > > > > > 
> > > > > > > > > > +struct xe_hwmon_data {
> > > > > > > > > > +    struct device *hwmon_dev;
> > > > > > > > > > +    struct xe_gt *gt;
> > > > > > > > > > +    char name[12];
> > > > > > > > > > +};
> > > > > > > > > > +
> > > > > > > > > > +struct xe_hwmon {
> > > > > > > > > > +    struct xe_hwmon_data ddat;
> > > > > > > > > > +    struct mutex hwmon_lock;
> > > > > > > > > > +};
> > > > > > > > > 
> > > > > > > > > why do we need two structures here? Can we merge them?
> > > > > > > > > 
> > > > > > > > 
> > > > > > > > A later patch adds multiple hwmon devices which makes use of it.
> > > > > > > > I think that is flawed, and I am not inclined to accept it.
> > > > > > > Is there any obvious reason that there shouldn't be multiple
> > > > > > > devices? In i915 we are doing the same.
> > > > > > > https://patchwork.freedesktop.org/patch/497324/?series=104278&rev=3
> > > > > > > 
> > > > > > 
> > > > > > Technically you can do whatever you like as long as the code doesn't reside
> > > > > > in drivers/hwmon. I won't NACK it, but I won't give it a Reviewed-by:
> > > > > > either. i915 shouldn't do it, but I didn't realize what they are doing
> > > > > > at the time. Other drivers doing it wrong is not an argument. You can't
> > > > > > argue that you may drive faster than the speed limit because others do it
> > > > > > or because police didn't stop you last time you did either.
> > > > > > 
> > > > > > One chip, one hwmon device. Do you have separate parent devices for
> > > > > > all your hwmon devices ? If yes, you can argue that having multiple hwmon
> > > > > > devices make sense. If not, you can't.
> > > > > Thanks for clarification. There is only one parent device. So will try to
> > > > > accommodate single hwmon device.
> > > > 
> > > > Well, it is one PCI device, but under 1 pci device we can have multiple "tiles"
> > > > that can duplicate many components. Inside each tile we can even have multiple
> > > > "gt"s.
> > > > 
> > > > But back to the tile, each tile has its own metrics. It's own power delivery,
> > > > own sensors and all. They can even be seen as independent devices from this
> > > > angle.
> > > > 
> > > > I'm afraid that the attempt to put everything as one device, but all the
> > > > entries duplicated per tile/gt we might end up with a messed api.
> > > > 
> > > 
> > > Your argument does not make sense. I am not asking to duplicate anything.
> > 
> > Okay, I'm sorry, maybe 'duplication' was a bad choice of words from my part.
> > 
> > You had told that having multiple hwmon device for a single chip was not
> > acceptable.
> > 
> > But I'm trying to explain that we have a hardware architecture where the graphics
> > is duplicated in 'tiles' inside the same PCI card. Each tile with its
> > own sensors and monitoring systems. And also an extra sensors monitoring the
> > entire 'package' that includes the tiles and the SoC.
> > So 1 hwmon device per gt-tile and package sound the appropriated way to me.
> > 
> 
> No, it isn't. Next you are going to tell me to split CPU temperature devices
> in the same way because they are split in "tiles" on the same CPU core.

okay, let's align with coretemp then.

> 
> > Your lines had convinced Badal to get them all and merge in a single hwmon
> > device. If we do this, the API will get messed up.
> > 
> > And this is what I meant by 'messed up':
> > quoting Badal:
> > """
> > With single device energy entries will look like hwmonxx/energy1_input,
> > energy2_input, energy3_input.
> > To identify which entry for what need to expose additional entry energyX_lable
> > which will contain ("package", "gtN")
> 
> So what is the problem with that ? That is a description and not "messed up".

From the user space perspective it would be easier to get an unique handle
for the subdevice (directory) and then inspect each property (files) with single
and direct access, without having to inspect every single 'label' file of that
property in the directory to match the desired sub-device.

But nevermind. Let's have a single hwmon per device with multiple label files
and aligning with cputemp.

> 
> Guenter
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 4ea9e3150c20..831be23e000b 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -116,6 +116,9 @@  xe-y += xe_bb.o \
 	xe_wa.o \
 	xe_wopcm.o
 
+# graphics hardware monitoring (HWMON) support
+xe-$(CONFIG_HWMON) += xe_hwmon.o
+
 # i915 Display compat #defines and #includes
 subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \
 	-I$(srctree)/$(src)/display/ext \
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 5409cf7895d3..01bd08812514 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -34,6 +34,7 @@ 
 #include "xe_vm.h"
 #include "xe_vm_madvise.h"
 #include "xe_wait_user_fence.h"
+#include "xe_hwmon.h"
 
 #ifdef CONFIG_LOCKDEP
 struct lockdep_map xe_device_mem_access_lockdep_map = {
@@ -335,6 +336,8 @@  int xe_device_probe(struct xe_device *xe)
 
 	xe_debugfs_register(xe);
 
+	xe_hwmon_register(xe);
+
 	err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
 	if (err)
 		return err;
@@ -361,6 +364,8 @@  static void xe_device_remove_display(struct xe_device *xe)
 
 void xe_device_remove(struct xe_device *xe)
 {
+	xe_hwmon_unregister(xe);
+
 	xe_device_remove_display(xe);
 
 	xe_display_unlink(xe);
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index b156f69d7320..dd06eba815ec 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -376,6 +376,8 @@  struct xe_device {
 	 */
 	struct task_struct *pm_callback_task;
 
+	struct xe_hwmon *hwmon;
+
 	/* private: */
 
 #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
new file mode 100644
index 000000000000..5e35128a61a8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -0,0 +1,150 @@ 
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/hwmon.h>
+
+#include <drm/drm_managed.h>
+#include "regs/xe_gt_regs.h"
+#include "xe_device.h"
+#include "xe_hwmon.h"
+
+struct xe_hwmon_data {
+	struct device *hwmon_dev;
+	struct xe_gt *gt;
+	char name[12];
+};
+
+struct xe_hwmon {
+	struct xe_hwmon_data ddat;
+	struct mutex hwmon_lock;
+};
+
+static const struct hwmon_channel_info *hwmon_info[] = {
+	NULL
+};
+
+static umode_t
+hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
+		 u32 attr, int channel)
+{
+	struct xe_hwmon_data *ddat = (struct xe_hwmon_data *)drvdata;
+	int ret;
+
+	xe_device_mem_access_get(gt_to_xe(ddat->gt));
+
+	switch (type) {
+	default:
+		ret = 0;
+		break;
+	}
+
+	xe_device_mem_access_put(gt_to_xe(ddat->gt));
+
+	return ret;
+}
+
+static int
+hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+	   int channel, long *val)
+{
+	struct xe_hwmon_data *ddat = dev_get_drvdata(dev);
+	int ret;
+
+	xe_device_mem_access_get(gt_to_xe(ddat->gt));
+
+	switch (type) {
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	xe_device_mem_access_put(gt_to_xe(ddat->gt));
+
+	return ret;
+}
+
+static int
+hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+	    int channel, long val)
+{
+	struct xe_hwmon_data *ddat = dev_get_drvdata(dev);
+	int ret;
+
+	xe_device_mem_access_get(gt_to_xe(ddat->gt));
+
+	switch (type) {
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	xe_device_mem_access_put(gt_to_xe(ddat->gt));
+
+	return ret;
+}
+
+static const struct hwmon_ops hwmon_ops = {
+	.is_visible = hwmon_is_visible,
+	.read = hwmon_read,
+	.write = hwmon_write,
+};
+
+static const struct hwmon_chip_info hwmon_chip_info = {
+	.ops = &hwmon_ops,
+	.info = hwmon_info,
+};
+
+static void
+hwmon_get_preregistration_info(struct xe_device *xe)
+{
+}
+
+void xe_hwmon_register(struct xe_device *xe)
+{
+	struct device *dev = xe->drm.dev;
+	struct xe_hwmon *hwmon;
+	struct device *hwmon_dev;
+	struct xe_hwmon_data *ddat;
+
+	/* hwmon is available only for dGfx */
+	if (!IS_DGFX(xe))
+		return;
+
+	hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
+	if (!hwmon)
+		return;
+
+	xe->hwmon = hwmon;
+	drmm_mutex_init(&xe->drm, &hwmon->hwmon_lock);
+
+	ddat = &hwmon->ddat;
+
+	/* primary GT to access device level properties */
+	ddat->gt = xe->tiles[0].primary_gt;
+
+	snprintf(ddat->name, sizeof(ddat->name), "xe");
+
+	hwmon_get_preregistration_info(xe);
+
+	drm_dbg(&xe->drm, "Register xe hwmon interface\n");
+
+	/*  hwmon_dev points to device hwmon<i> */
+	hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
+							 ddat,
+							 &hwmon_chip_info,
+							 NULL);
+	if (IS_ERR(hwmon_dev)) {
+		drm_warn(&xe->drm, "Fail to register xe hwmon, Err:%ld\n", PTR_ERR(hwmon_dev));
+		xe->hwmon = NULL;
+		return;
+	}
+
+	ddat->hwmon_dev = hwmon_dev;
+}
+
+void xe_hwmon_unregister(struct xe_device *xe)
+{
+	xe->hwmon = NULL;
+}
diff --git a/drivers/gpu/drm/xe/xe_hwmon.h b/drivers/gpu/drm/xe/xe_hwmon.h
new file mode 100644
index 000000000000..a078eeb0a68b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hwmon.h
@@ -0,0 +1,22 @@ 
+/* SPDX-License-Identifier: MIT */
+
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __XE_HWMON_H__
+#define __XE_HWMON_H__
+
+#include <linux/types.h>
+
+struct xe_device;
+
+#if IS_REACHABLE(CONFIG_HWMON)
+void xe_hwmon_register(struct xe_device *xe);
+void xe_hwmon_unregister(struct xe_device *xe);
+#else
+static inline void xe_hwmon_register(struct xe_device *xe) { };
+static inline void xe_hwmon_unregister(struct xe_device *xe) { };
+#endif
+
+#endif /* __XE_HWMON_H__ */