diff mbox series

[4/6] hwmon: amd_energy: let user enable/disable the sw accumulation

Message ID 20200905143230.195049-5-nchatrad@amd.com (mailing list archive)
State Changes Requested
Headers show
Series RFC: hwmon: few improvements to amd_energy driver | expand

Commit Message

Naveen Krishna Chatradhi Sept. 5, 2020, 2:32 p.m. UTC
Provide an option "accumulator_status" via sysfs to enable/disable
the software accumulation of energy counters.

Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
---
 drivers/hwmon/amd_energy.c | 104 ++++++++++++++++++++++++++++++-------
 1 file changed, 86 insertions(+), 18 deletions(-)

Comments

Guenter Roeck Sept. 5, 2020, 3:17 p.m. UTC | #1
On 9/5/20 7:32 AM, Naveen Krishna Chatradhi wrote:
> Provide an option "accumulator_status" via sysfs to enable/disable
> the software accumulation of energy counters.
> 
> Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>

I am quite substantially opposed to this. I'd be willing to
accept a module parameter. However, the code is there, and it is
enabled by default, and it should stay enabled by default.
I don't want to have to deal with people complaining that
it suddenly no longer works.

Also, there needs to be an explanation for why this is needed.

Guenter

> ---
>  drivers/hwmon/amd_energy.c | 104 ++++++++++++++++++++++++++++++-------
>  1 file changed, 86 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
> index 96c61784d05c..c294bea56c02 100644
> --- a/drivers/hwmon/amd_energy.c
> +++ b/drivers/hwmon/amd_energy.c
> @@ -32,6 +32,8 @@
>  #define AMD_ENERGY_UNIT_MASK	0x01F00
>  #define AMD_ENERGY_MASK		0xFFFFFFFF
>  
> +static struct device_attribute accumulate_attr;
> +
>  struct sensor_accumulator {
>  	u64 energy_ctr;
>  	u64 prev_value;
> @@ -42,10 +44,12 @@ struct amd_energy_data {
>  	const struct hwmon_channel_info *info[2];
>  	struct hwmon_chip_info chip;
>  	struct task_struct *wrap_accumulate;
> +	struct device *hwmon_dev;
>  	/* Lock around the accumulator */
>  	struct mutex lock;
>  	/* An accumulator for each core and socket */
>  	struct sensor_accumulator *accums;
> +	bool accumulator_status;
>  	/* Energy Status Units */
>  	u64 energy_units;
>  	unsigned int timeout;
> @@ -128,13 +132,15 @@ static void amd_add_delta(struct amd_energy_data *data, int ch,
>  	rdmsrl_safe_on_cpu(cpu, reg, &input);
>  	input &= AMD_ENERGY_MASK;
>  
> -	accum = &data->accums[ch];
> -	if (input >= accum->prev_value)
> -		input += accum->energy_ctr -
> -				accum->prev_value;
> -	else
> -		input += UINT_MAX - accum->prev_value +
> -				accum->energy_ctr;
> +	if (data->accumulator_status) {
> +		accum = &data->accums[ch];
> +		if (input >= accum->prev_value)
> +			input += accum->energy_ctr -
> +					accum->prev_value;
> +		else
> +			input += UINT_MAX - accum->prev_value +
> +					accum->energy_ctr;
> +	}
>  
>  	/* Energy consumed = (1/(2^ESU) * RAW * 1000000UL) μJoules */
>  	*val = div64_ul(input * 1000000UL, BIT(data->energy_units));
> @@ -264,9 +270,67 @@ static int amd_create_sensor(struct device *dev,
>  	return 0;
>  }
>  
> +static ssize_t amd_energy_accumulate_show(struct device *dev,
> +					  struct device_attribute *dev_attr,
> +					  char *buf)
> +{
> +	struct amd_energy_data *data = dev_get_drvdata(dev);
> +
> +	return sprintf(buf, "%d\n", data->accumulator_status);
> +}
> +
> +static ssize_t amd_energy_accumulate_store(struct device *dev,
> +					   struct device_attribute *dev_attr,
> +					   const char *buf, size_t count)
> +{
> +	struct amd_energy_data *data = dev_get_drvdata(dev);
> +	bool input;
> +	int ret;
> +
> +	ret = kstrtobool(buf, &input);
> +	if (ret)
> +		return ret;
> +
> +	if (data->accumulator_status == input)
> +		return count;
> +
> +	if (input) {
> +		memset(data->accums, 0, (data->nr_cpus + data->nr_socks) *
> +			sizeof(struct sensor_accumulator));
> +
> +		if (!data->wrap_accumulate) {
> +			data->wrap_accumulate =
> +				kthread_run(energy_accumulator,
> +					    data, "%s", dev_name(dev));
> +			if (IS_ERR(data->wrap_accumulate))
> +				return PTR_ERR(data->wrap_accumulate);
> +		}
> +	} else {
> +		if (data && data->wrap_accumulate) {
> +			ret = kthread_stop(data->wrap_accumulate);
> +			if (ret)
> +				return ret;
> +			data->wrap_accumulate = NULL;
> +		}
> +	}
> +	data->accumulator_status = input;
> +
> +	return count;
> +}
> +
> +static int create_accumulate_status_file(struct amd_energy_data *data)
> +{
> +	accumulate_attr.attr.name = "accumulator_status";
> +	accumulate_attr.attr.mode = 0664;
> +	accumulate_attr.show = amd_energy_accumulate_show;
> +	accumulate_attr.store = amd_energy_accumulate_store;
> +
> +	return sysfs_create_file(&data->hwmon_dev->kobj,
> +				 &accumulate_attr.attr);
> +}
> +
>  static int amd_energy_probe(struct platform_device *pdev)
>  {
> -	struct device *hwmon_dev;
>  	struct amd_energy_data *data;
>  	struct device *dev = &pdev->dev;
>  	int ret;
> @@ -290,12 +354,12 @@ static int amd_energy_probe(struct platform_device *pdev)
>  	mutex_init(&data->lock);
>  	get_energy_units(data);
>  
> -	hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
> -							 data,
> -							 &data->chip,
> -							 NULL);
> -	if (IS_ERR(hwmon_dev))
> -		return PTR_ERR(hwmon_dev);
> +	data->hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
> +							       data,
> +							       &data->chip,
> +							       NULL);
> +	if (IS_ERR(data->hwmon_dev))
> +		return PTR_ERR(data->hwmon_dev);
>  
>  	/* Once in 3 minutes for a resolution of 1/2*16 */
>  	if (data->energy_units == 0x10)
> @@ -305,10 +369,12 @@ static int amd_energy_probe(struct platform_device *pdev)
>  	if (data->energy_units == 0x6)
>  		data->timeout = 3 * 24 * 60 * 60;
>  
> -	data->wrap_accumulate = kthread_run(energy_accumulator, data,
> -					    "%s", dev_name(hwmon_dev));
> -	if (IS_ERR(data->wrap_accumulate))
> -		return PTR_ERR(data->wrap_accumulate);
> +	/* Disabling the energy accumulation by default */
> +	data->accumulator_status = 0;
> +
> +	ret = create_accumulate_status_file(data);
> +	if (ret)
> +		return ret;
>  
>  	return 0;
>  }
> @@ -317,6 +383,8 @@ static int amd_energy_remove(struct platform_device *pdev)
>  {
>  	struct amd_energy_data *data = dev_get_drvdata(&pdev->dev);
>  
> +	sysfs_remove_file(&data->hwmon_dev->kobj, &accumulate_attr.attr);
> +
>  	if (data && data->wrap_accumulate)
>  		kthread_stop(data->wrap_accumulate);
>  
>
Guenter Roeck Sept. 5, 2020, 3:33 p.m. UTC | #2
On 9/5/20 8:17 AM, Guenter Roeck wrote:
> On 9/5/20 7:32 AM, Naveen Krishna Chatradhi wrote:
>> Provide an option "accumulator_status" via sysfs to enable/disable
>> the software accumulation of energy counters.
>>
>> Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
> 
> I am quite substantially opposed to this. I'd be willing to
> accept a module parameter. However, the code is there, and it is
> enabled by default, and it should stay enabled by default.
> I don't want to have to deal with people complaining that
> it suddenly no longer works.
> 
> Also, there needs to be an explanation for why this is needed.
> 

No, wait, without accumulator this driver has zero value.
Users should just not load the driver if they don't want the
overhead associated with accumulation.

Guenter

> Guenter
> 
>> ---
>>  drivers/hwmon/amd_energy.c | 104 ++++++++++++++++++++++++++++++-------
>>  1 file changed, 86 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
>> index 96c61784d05c..c294bea56c02 100644
>> --- a/drivers/hwmon/amd_energy.c
>> +++ b/drivers/hwmon/amd_energy.c
>> @@ -32,6 +32,8 @@
>>  #define AMD_ENERGY_UNIT_MASK	0x01F00
>>  #define AMD_ENERGY_MASK		0xFFFFFFFF
>>  
>> +static struct device_attribute accumulate_attr;
>> +
>>  struct sensor_accumulator {
>>  	u64 energy_ctr;
>>  	u64 prev_value;
>> @@ -42,10 +44,12 @@ struct amd_energy_data {
>>  	const struct hwmon_channel_info *info[2];
>>  	struct hwmon_chip_info chip;
>>  	struct task_struct *wrap_accumulate;
>> +	struct device *hwmon_dev;
>>  	/* Lock around the accumulator */
>>  	struct mutex lock;
>>  	/* An accumulator for each core and socket */
>>  	struct sensor_accumulator *accums;
>> +	bool accumulator_status;
>>  	/* Energy Status Units */
>>  	u64 energy_units;
>>  	unsigned int timeout;
>> @@ -128,13 +132,15 @@ static void amd_add_delta(struct amd_energy_data *data, int ch,
>>  	rdmsrl_safe_on_cpu(cpu, reg, &input);
>>  	input &= AMD_ENERGY_MASK;
>>  
>> -	accum = &data->accums[ch];
>> -	if (input >= accum->prev_value)
>> -		input += accum->energy_ctr -
>> -				accum->prev_value;
>> -	else
>> -		input += UINT_MAX - accum->prev_value +
>> -				accum->energy_ctr;
>> +	if (data->accumulator_status) {
>> +		accum = &data->accums[ch];
>> +		if (input >= accum->prev_value)
>> +			input += accum->energy_ctr -
>> +					accum->prev_value;
>> +		else
>> +			input += UINT_MAX - accum->prev_value +
>> +					accum->energy_ctr;
>> +	}
>>  
>>  	/* Energy consumed = (1/(2^ESU) * RAW * 1000000UL) μJoules */
>>  	*val = div64_ul(input * 1000000UL, BIT(data->energy_units));
>> @@ -264,9 +270,67 @@ static int amd_create_sensor(struct device *dev,
>>  	return 0;
>>  }
>>  
>> +static ssize_t amd_energy_accumulate_show(struct device *dev,
>> +					  struct device_attribute *dev_attr,
>> +					  char *buf)
>> +{
>> +	struct amd_energy_data *data = dev_get_drvdata(dev);
>> +
>> +	return sprintf(buf, "%d\n", data->accumulator_status);
>> +}
>> +
>> +static ssize_t amd_energy_accumulate_store(struct device *dev,
>> +					   struct device_attribute *dev_attr,
>> +					   const char *buf, size_t count)
>> +{
>> +	struct amd_energy_data *data = dev_get_drvdata(dev);
>> +	bool input;
>> +	int ret;
>> +
>> +	ret = kstrtobool(buf, &input);
>> +	if (ret)
>> +		return ret;
>> +
>> +	if (data->accumulator_status == input)
>> +		return count;
>> +
>> +	if (input) {
>> +		memset(data->accums, 0, (data->nr_cpus + data->nr_socks) *
>> +			sizeof(struct sensor_accumulator));
>> +
>> +		if (!data->wrap_accumulate) {
>> +			data->wrap_accumulate =
>> +				kthread_run(energy_accumulator,
>> +					    data, "%s", dev_name(dev));
>> +			if (IS_ERR(data->wrap_accumulate))
>> +				return PTR_ERR(data->wrap_accumulate);
>> +		}
>> +	} else {
>> +		if (data && data->wrap_accumulate) {
>> +			ret = kthread_stop(data->wrap_accumulate);
>> +			if (ret)
>> +				return ret;
>> +			data->wrap_accumulate = NULL;
>> +		}
>> +	}
>> +	data->accumulator_status = input;
>> +
>> +	return count;
>> +}
>> +
>> +static int create_accumulate_status_file(struct amd_energy_data *data)
>> +{
>> +	accumulate_attr.attr.name = "accumulator_status";
>> +	accumulate_attr.attr.mode = 0664;
>> +	accumulate_attr.show = amd_energy_accumulate_show;
>> +	accumulate_attr.store = amd_energy_accumulate_store;
>> +
>> +	return sysfs_create_file(&data->hwmon_dev->kobj,
>> +				 &accumulate_attr.attr);
>> +}
>> +
>>  static int amd_energy_probe(struct platform_device *pdev)
>>  {
>> -	struct device *hwmon_dev;
>>  	struct amd_energy_data *data;
>>  	struct device *dev = &pdev->dev;
>>  	int ret;
>> @@ -290,12 +354,12 @@ static int amd_energy_probe(struct platform_device *pdev)
>>  	mutex_init(&data->lock);
>>  	get_energy_units(data);
>>  
>> -	hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
>> -							 data,
>> -							 &data->chip,
>> -							 NULL);
>> -	if (IS_ERR(hwmon_dev))
>> -		return PTR_ERR(hwmon_dev);
>> +	data->hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
>> +							       data,
>> +							       &data->chip,
>> +							       NULL);
>> +	if (IS_ERR(data->hwmon_dev))
>> +		return PTR_ERR(data->hwmon_dev);
>>  
>>  	/* Once in 3 minutes for a resolution of 1/2*16 */
>>  	if (data->energy_units == 0x10)
>> @@ -305,10 +369,12 @@ static int amd_energy_probe(struct platform_device *pdev)
>>  	if (data->energy_units == 0x6)
>>  		data->timeout = 3 * 24 * 60 * 60;
>>  
>> -	data->wrap_accumulate = kthread_run(energy_accumulator, data,
>> -					    "%s", dev_name(hwmon_dev));
>> -	if (IS_ERR(data->wrap_accumulate))
>> -		return PTR_ERR(data->wrap_accumulate);
>> +	/* Disabling the energy accumulation by default */
>> +	data->accumulator_status = 0;
>> +
>> +	ret = create_accumulate_status_file(data);
>> +	if (ret)
>> +		return ret;
>>  
>>  	return 0;
>>  }
>> @@ -317,6 +383,8 @@ static int amd_energy_remove(struct platform_device *pdev)
>>  {
>>  	struct amd_energy_data *data = dev_get_drvdata(&pdev->dev);
>>  
>> +	sysfs_remove_file(&data->hwmon_dev->kobj, &accumulate_attr.attr);
>> +
>>  	if (data && data->wrap_accumulate)
>>  		kthread_stop(data->wrap_accumulate);
>>  
>>
>
Naveen Krishna Ch Sept. 8, 2020, 4:21 p.m. UTC | #3
Hi Guenter

On Tue, 8 Sep 2020 at 21:42, Chatradhi, Naveen Krishna
<NaveenKrishna.Chatradhi@amd.com> wrote:
>
> [AMD Public Use]
>
> [CAUTION: External Email]
>
> On 9/5/20 8:17 AM, Guenter Roeck wrote:
> > On 9/5/20 7:32 AM, Naveen Krishna Chatradhi wrote:
> >> Provide an option "accumulator_status" via sysfs to enable/disable
> >> the software accumulation of energy counters.
> >>
> >> Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
> >
> > I am quite substantially opposed to this. I'd be willing to accept a
> > module parameter. However, the code is there, and it is enabled by
> > default, and it should stay enabled by default.
Sure, I will change it back.

> > I don't want to have to deal with people complaining that it suddenly
> > no longer works.
Understood.
> >
> > Also, there needs to be an explanation for why this is needed.
> >
>
> No, wait, without accumulator this driver has zero value.
> Users should just not load the driver if they don't want the overhead associated with accumulation.

The use case we have is to provide an interface to enable/disable
accumulation by the admins, without having to reboot or install
something.
A userspace API is provided which will be used by the applications.

>
> Guenter
>
> > Guenter
> >
> >> ---
> >>  drivers/hwmon/amd_energy.c | 104
> >> ++++++++++++++++++++++++++++++-------
> >>  1 file changed, 86 insertions(+), 18 deletions(-)
> >>
> >> diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
> >> index 96c61784d05c..c294bea56c02 100644
> >> --- a/drivers/hwmon/amd_energy.c
> >> +++ b/drivers/hwmon/amd_energy.c
> >> @@ -32,6 +32,8 @@
> >>  #define AMD_ENERGY_UNIT_MASK        0x01F00
> >>  #define AMD_ENERGY_MASK             0xFFFFFFFF
> >>
> >> +static struct device_attribute accumulate_attr;
> >> +
> >>  struct sensor_accumulator {
> >>      u64 energy_ctr;
> >>      u64 prev_value;
> >> @@ -42,10 +44,12 @@ struct amd_energy_data {
> >>      const struct hwmon_channel_info *info[2];
> >>      struct hwmon_chip_info chip;
> >>      struct task_struct *wrap_accumulate;
> >> +    struct device *hwmon_dev;
> >>      /* Lock around the accumulator */
> >>      struct mutex lock;
> >>      /* An accumulator for each core and socket */
> >>      struct sensor_accumulator *accums;
> >> +    bool accumulator_status;
> >>      /* Energy Status Units */
> >>      u64 energy_units;
> >>      unsigned int timeout;
> >> @@ -128,13 +132,15 @@ static void amd_add_delta(struct amd_energy_data *data, int ch,
> >>      rdmsrl_safe_on_cpu(cpu, reg, &input);
> >>      input &= AMD_ENERGY_MASK;
> >>
> >> -    accum = &data->accums[ch];
> >> -    if (input >= accum->prev_value)
> >> -            input += accum->energy_ctr -
> >> -                            accum->prev_value;
> >> -    else
> >> -            input += UINT_MAX - accum->prev_value +
> >> -                            accum->energy_ctr;
> >> +    if (data->accumulator_status) {
> >> +            accum = &data->accums[ch];
> >> +            if (input >= accum->prev_value)
> >> +                    input += accum->energy_ctr -
> >> +                                    accum->prev_value;
> >> +            else
> >> +                    input += UINT_MAX - accum->prev_value +
> >> +                                    accum->energy_ctr;
> >> +    }
> >>
> >>      /* Energy consumed = (1/(2^ESU) * RAW * 1000000UL) μJoules */
> >>      *val = div64_ul(input * 1000000UL, BIT(data->energy_units)); @@
> >> -264,9 +270,67 @@ static int amd_create_sensor(struct device *dev,
> >>      return 0;
> >>  }
> >>
> >> +static ssize_t amd_energy_accumulate_show(struct device *dev,
> >> +                                      struct device_attribute *dev_attr,
> >> +                                      char *buf) {
> >> +    struct amd_energy_data *data = dev_get_drvdata(dev);
> >> +
> >> +    return sprintf(buf, "%d\n", data->accumulator_status); }
> >> +
> >> +static ssize_t amd_energy_accumulate_store(struct device *dev,
> >> +                                       struct device_attribute *dev_attr,
> >> +                                       const char *buf, size_t
> >> +count) {
> >> +    struct amd_energy_data *data = dev_get_drvdata(dev);
> >> +    bool input;
> >> +    int ret;
> >> +
> >> +    ret = kstrtobool(buf, &input);
> >> +    if (ret)
> >> +            return ret;
> >> +
> >> +    if (data->accumulator_status == input)
> >> +            return count;
> >> +
> >> +    if (input) {
> >> +            memset(data->accums, 0, (data->nr_cpus + data->nr_socks) *
> >> +                    sizeof(struct sensor_accumulator));
> >> +
> >> +            if (!data->wrap_accumulate) {
> >> +                    data->wrap_accumulate =
> >> +                            kthread_run(energy_accumulator,
> >> +                                        data, "%s", dev_name(dev));
> >> +                    if (IS_ERR(data->wrap_accumulate))
> >> +                            return PTR_ERR(data->wrap_accumulate);
> >> +            }
> >> +    } else {
> >> +            if (data && data->wrap_accumulate) {
> >> +                    ret = kthread_stop(data->wrap_accumulate);
> >> +                    if (ret)
> >> +                            return ret;
> >> +                    data->wrap_accumulate = NULL;
> >> +            }
> >> +    }
> >> +    data->accumulator_status = input;
> >> +
> >> +    return count;
> >> +}
> >> +
> >> +static int create_accumulate_status_file(struct amd_energy_data
> >> +*data) {
> >> +    accumulate_attr.attr.name = "accumulator_status";
> >> +    accumulate_attr.attr.mode = 0664;
> >> +    accumulate_attr.show = amd_energy_accumulate_show;
> >> +    accumulate_attr.store = amd_energy_accumulate_store;
> >> +
> >> +    return sysfs_create_file(&data->hwmon_dev->kobj,
> >> +                             &accumulate_attr.attr); }
> >> +
> >>  static int amd_energy_probe(struct platform_device *pdev)  {
> >> -    struct device *hwmon_dev;
> >>      struct amd_energy_data *data;
> >>      struct device *dev = &pdev->dev;
> >>      int ret;
> >> @@ -290,12 +354,12 @@ static int amd_energy_probe(struct platform_device *pdev)
> >>      mutex_init(&data->lock);
> >>      get_energy_units(data);
> >>
> >> -    hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
> >> -                                                     data,
> >> -                                                     &data->chip,
> >> -                                                     NULL);
> >> -    if (IS_ERR(hwmon_dev))
> >> -            return PTR_ERR(hwmon_dev);
> >> +    data->hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
> >> +                                                           data,
> >> +                                                           &data->chip,
> >> +                                                           NULL);
> >> +    if (IS_ERR(data->hwmon_dev))
> >> +            return PTR_ERR(data->hwmon_dev);
> >>
> >>      /* Once in 3 minutes for a resolution of 1/2*16 */
> >>      if (data->energy_units == 0x10)
> >> @@ -305,10 +369,12 @@ static int amd_energy_probe(struct platform_device *pdev)
> >>      if (data->energy_units == 0x6)
> >>              data->timeout = 3 * 24 * 60 * 60;
> >>
> >> -    data->wrap_accumulate = kthread_run(energy_accumulator, data,
> >> -                                        "%s", dev_name(hwmon_dev));
> >> -    if (IS_ERR(data->wrap_accumulate))
> >> -            return PTR_ERR(data->wrap_accumulate);
> >> +    /* Disabling the energy accumulation by default */
> >> +    data->accumulator_status = 0;
> >> +
> >> +    ret = create_accumulate_status_file(data);
> >> +    if (ret)
> >> +            return ret;
> >>
> >>      return 0;
> >>  }
> >> @@ -317,6 +383,8 @@ static int amd_energy_remove(struct
> >> platform_device *pdev)  {
> >>      struct amd_energy_data *data = dev_get_drvdata(&pdev->dev);
> >>
> >> +    sysfs_remove_file(&data->hwmon_dev->kobj,
> >> + &accumulate_attr.attr);
> >> +
> >>      if (data && data->wrap_accumulate)
> >>              kthread_stop(data->wrap_accumulate);
> >>
> >>
> >
Guenter Roeck Sept. 8, 2020, 4:36 p.m. UTC | #4
On Tue, Sep 08, 2020 at 09:51:23PM +0530, Naveen Krishna Ch wrote:
> Hi Guenter
> 
> On Tue, 8 Sep 2020 at 21:42, Chatradhi, Naveen Krishna
> <NaveenKrishna.Chatradhi@amd.com> wrote:
> >
> > [AMD Public Use]
> >
> > [CAUTION: External Email]
> >
> > On 9/5/20 8:17 AM, Guenter Roeck wrote:
> > > On 9/5/20 7:32 AM, Naveen Krishna Chatradhi wrote:
> > >> Provide an option "accumulator_status" via sysfs to enable/disable
> > >> the software accumulation of energy counters.
> > >>
> > >> Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
> > >
> > > I am quite substantially opposed to this. I'd be willing to accept a
> > > module parameter. However, the code is there, and it is enabled by
> > > default, and it should stay enabled by default.
> Sure, I will change it back.
> 
> > > I don't want to have to deal with people complaining that it suddenly
> > > no longer works.
> Understood.
> > >
> > > Also, there needs to be an explanation for why this is needed.
> > >
> >
> > No, wait, without accumulator this driver has zero value.
> > Users should just not load the driver if they don't want the overhead associated with accumulation.
> 
> The use case we have is to provide an interface to enable/disable
> accumulation by the admins, without having to reboot or install
> something.
> A userspace API is provided which will be used by the applications.
> 

Really, that should not be done. If accumulation is not wanted, the
driver is pointless and should be unloaded (or not be loaded to start
with).

Guenter
diff mbox series

Patch

diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
index 96c61784d05c..c294bea56c02 100644
--- a/drivers/hwmon/amd_energy.c
+++ b/drivers/hwmon/amd_energy.c
@@ -32,6 +32,8 @@ 
 #define AMD_ENERGY_UNIT_MASK	0x01F00
 #define AMD_ENERGY_MASK		0xFFFFFFFF
 
+static struct device_attribute accumulate_attr;
+
 struct sensor_accumulator {
 	u64 energy_ctr;
 	u64 prev_value;
@@ -42,10 +44,12 @@  struct amd_energy_data {
 	const struct hwmon_channel_info *info[2];
 	struct hwmon_chip_info chip;
 	struct task_struct *wrap_accumulate;
+	struct device *hwmon_dev;
 	/* Lock around the accumulator */
 	struct mutex lock;
 	/* An accumulator for each core and socket */
 	struct sensor_accumulator *accums;
+	bool accumulator_status;
 	/* Energy Status Units */
 	u64 energy_units;
 	unsigned int timeout;
@@ -128,13 +132,15 @@  static void amd_add_delta(struct amd_energy_data *data, int ch,
 	rdmsrl_safe_on_cpu(cpu, reg, &input);
 	input &= AMD_ENERGY_MASK;
 
-	accum = &data->accums[ch];
-	if (input >= accum->prev_value)
-		input += accum->energy_ctr -
-				accum->prev_value;
-	else
-		input += UINT_MAX - accum->prev_value +
-				accum->energy_ctr;
+	if (data->accumulator_status) {
+		accum = &data->accums[ch];
+		if (input >= accum->prev_value)
+			input += accum->energy_ctr -
+					accum->prev_value;
+		else
+			input += UINT_MAX - accum->prev_value +
+					accum->energy_ctr;
+	}
 
 	/* Energy consumed = (1/(2^ESU) * RAW * 1000000UL) μJoules */
 	*val = div64_ul(input * 1000000UL, BIT(data->energy_units));
@@ -264,9 +270,67 @@  static int amd_create_sensor(struct device *dev,
 	return 0;
 }
 
+static ssize_t amd_energy_accumulate_show(struct device *dev,
+					  struct device_attribute *dev_attr,
+					  char *buf)
+{
+	struct amd_energy_data *data = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", data->accumulator_status);
+}
+
+static ssize_t amd_energy_accumulate_store(struct device *dev,
+					   struct device_attribute *dev_attr,
+					   const char *buf, size_t count)
+{
+	struct amd_energy_data *data = dev_get_drvdata(dev);
+	bool input;
+	int ret;
+
+	ret = kstrtobool(buf, &input);
+	if (ret)
+		return ret;
+
+	if (data->accumulator_status == input)
+		return count;
+
+	if (input) {
+		memset(data->accums, 0, (data->nr_cpus + data->nr_socks) *
+			sizeof(struct sensor_accumulator));
+
+		if (!data->wrap_accumulate) {
+			data->wrap_accumulate =
+				kthread_run(energy_accumulator,
+					    data, "%s", dev_name(dev));
+			if (IS_ERR(data->wrap_accumulate))
+				return PTR_ERR(data->wrap_accumulate);
+		}
+	} else {
+		if (data && data->wrap_accumulate) {
+			ret = kthread_stop(data->wrap_accumulate);
+			if (ret)
+				return ret;
+			data->wrap_accumulate = NULL;
+		}
+	}
+	data->accumulator_status = input;
+
+	return count;
+}
+
+static int create_accumulate_status_file(struct amd_energy_data *data)
+{
+	accumulate_attr.attr.name = "accumulator_status";
+	accumulate_attr.attr.mode = 0664;
+	accumulate_attr.show = amd_energy_accumulate_show;
+	accumulate_attr.store = amd_energy_accumulate_store;
+
+	return sysfs_create_file(&data->hwmon_dev->kobj,
+				 &accumulate_attr.attr);
+}
+
 static int amd_energy_probe(struct platform_device *pdev)
 {
-	struct device *hwmon_dev;
 	struct amd_energy_data *data;
 	struct device *dev = &pdev->dev;
 	int ret;
@@ -290,12 +354,12 @@  static int amd_energy_probe(struct platform_device *pdev)
 	mutex_init(&data->lock);
 	get_energy_units(data);
 
-	hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
-							 data,
-							 &data->chip,
-							 NULL);
-	if (IS_ERR(hwmon_dev))
-		return PTR_ERR(hwmon_dev);
+	data->hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
+							       data,
+							       &data->chip,
+							       NULL);
+	if (IS_ERR(data->hwmon_dev))
+		return PTR_ERR(data->hwmon_dev);
 
 	/* Once in 3 minutes for a resolution of 1/2*16 */
 	if (data->energy_units == 0x10)
@@ -305,10 +369,12 @@  static int amd_energy_probe(struct platform_device *pdev)
 	if (data->energy_units == 0x6)
 		data->timeout = 3 * 24 * 60 * 60;
 
-	data->wrap_accumulate = kthread_run(energy_accumulator, data,
-					    "%s", dev_name(hwmon_dev));
-	if (IS_ERR(data->wrap_accumulate))
-		return PTR_ERR(data->wrap_accumulate);
+	/* Disabling the energy accumulation by default */
+	data->accumulator_status = 0;
+
+	ret = create_accumulate_status_file(data);
+	if (ret)
+		return ret;
 
 	return 0;
 }
@@ -317,6 +383,8 @@  static int amd_energy_remove(struct platform_device *pdev)
 {
 	struct amd_energy_data *data = dev_get_drvdata(&pdev->dev);
 
+	sysfs_remove_file(&data->hwmon_dev->kobj, &accumulate_attr.attr);
+
 	if (data && data->wrap_accumulate)
 		kthread_stop(data->wrap_accumulate);