diff mbox series

[v3,10/14,RESEND] xen: Add SET_CPUFREQ_HWP xen_sysctl_pm_op

Message ID 20230501193034.88575-11-jandryuk@gmail.com (mailing list archive)
State Superseded
Headers show
Series Intel Hardware P-States (HWP) support | expand

Commit Message

Jason Andryuk May 1, 2023, 7:30 p.m. UTC
Add SET_CPUFREQ_HWP xen_sysctl_pm_op to set HWP parameters.  The sysctl
supports setting multiple values simultaneously as indicated by the
set_params bits.  This allows atomically applying new HWP configuration
via a single wrmsr.

XEN_SYSCTL_HWP_SET_PRESET_BALANCE/PERFORMANCE/POWERSAVE provide three
common presets.  Setting them depends on hardware limits which the
hypervisor is already caching.  So using them allows skipping a
hypercall to query the limits (lowest/highest) to then set those same
values.  The code is organized to allow a preset to be refined with
additional stuff if desired.

"most_efficient" and "guaranteed" could be additional presets in the
future, but the are not added now.  Those levels can change at runtime,
but we don't have code in place to monitor and update for those events.

Signed-off-by: Jason Andryuk <jandryuk@gmail.com>

---
v3:
Remove cpufreq_governor_internal from set_cpufreq_hwp

v2:
Update for naming anonymous union
Drop hwp_err for invalid input in set_hwp_para()
Drop uint16_t cast in XEN_SYSCTL_HWP_SET_PARAM_MASK
Drop parens for HWP_SET_PRESET defines
Reference activity_window format comment
Place SET_CPUFREQ_HWP after SET_CPUFREQ_PARA
Add {HWP,IA32}_ENERGY_PERF_MAX_{PERFORMANCE,POWERSAVE} defines
Order defines before fields in sysctl.h
Use XEN_HWP_GOVERNOR
Use per_cpu for hwp_drv_data
---
 xen/arch/x86/acpi/cpufreq/hwp.c    | 96 ++++++++++++++++++++++++++++++
 xen/drivers/acpi/pmstat.c          | 18 ++++++
 xen/include/acpi/cpufreq/cpufreq.h |  2 +
 xen/include/public/sysctl.h        | 30 ++++++++++
 4 files changed, 146 insertions(+)

Comments

Jan Beulich May 8, 2023, 11:27 a.m. UTC | #1
On 01.05.2023 21:30, Jason Andryuk wrote:
> @@ -531,6 +533,100 @@ int get_hwp_para(const struct cpufreq_policy *policy,
>      return 0;
>  }
>  
> +int set_hwp_para(struct cpufreq_policy *policy,
> +                 struct xen_set_hwp_para *set_hwp)

const?

> +{
> +    unsigned int cpu = policy->cpu;
> +    struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
> +
> +    if ( data == NULL )
> +        return -EINVAL;
> +
> +    /* Validate all parameters first */
> +    if ( set_hwp->set_params & ~XEN_SYSCTL_HWP_SET_PARAM_MASK )
> +        return -EINVAL;
> +
> +    if ( set_hwp->activity_window & ~XEN_SYSCTL_HWP_ACT_WINDOW_MASK )
> +        return -EINVAL;

Below you limit checks to when the respective control bit is set. I
think you want the same here.

> +    if ( !feature_hwp_energy_perf &&
> +         (set_hwp->set_params & XEN_SYSCTL_HWP_SET_ENERGY_PERF) &&
> +         set_hwp->energy_perf > IA32_ENERGY_BIAS_MAX_POWERSAVE )
> +        return -EINVAL;
> +
> +    if ( (set_hwp->set_params & XEN_SYSCTL_HWP_SET_DESIRED) &&
> +         set_hwp->desired != 0 &&
> +         (set_hwp->desired < data->hw.lowest ||
> +          set_hwp->desired > data->hw.highest) )
> +        return -EINVAL;
> +
> +    /*
> +     * minimum & maximum are not validated as hardware doesn't seem to care
> +     * and the SDM says CPUs will clip internally.
> +     */
> +
> +    /* Apply presets */
> +    switch ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_PRESET_MASK )
> +    {
> +    case XEN_SYSCTL_HWP_SET_PRESET_POWERSAVE:
> +        data->minimum = data->hw.lowest;
> +        data->maximum = data->hw.lowest;
> +        data->activity_window = 0;
> +        if ( feature_hwp_energy_perf )
> +            data->energy_perf = HWP_ENERGY_PERF_MAX_POWERSAVE;
> +        else
> +            data->energy_perf = IA32_ENERGY_BIAS_MAX_POWERSAVE;
> +        data->desired = 0;
> +        break;
> +
> +    case XEN_SYSCTL_HWP_SET_PRESET_PERFORMANCE:
> +        data->minimum = data->hw.highest;
> +        data->maximum = data->hw.highest;
> +        data->activity_window = 0;
> +        data->energy_perf = HWP_ENERGY_PERF_MAX_PERFORMANCE;
> +        data->desired = 0;
> +        break;
> +
> +    case XEN_SYSCTL_HWP_SET_PRESET_BALANCE:
> +        data->minimum = data->hw.lowest;
> +        data->maximum = data->hw.highest;
> +        data->activity_window = 0;
> +        if ( feature_hwp_energy_perf )
> +            data->energy_perf = HWP_ENERGY_PERF_BALANCE;
> +        else
> +            data->energy_perf = IA32_ENERGY_BIAS_BALANCE;
> +        data->desired = 0;
> +        break;
> +
> +    case XEN_SYSCTL_HWP_SET_PRESET_NONE:
> +        break;
> +
> +    default:
> +        return -EINVAL;
> +    }

So presets set all the values for which the individual item control bits
are clear. That's not exactly what I would have expected, and it took me
reading the code several times until I realized that you write life per-
CPU data fields here, not fields of some intermediate variable. I think
this could do with saying explicitly in the public header (if indeed the
intended model).

> +    /* Further customize presets if needed */
> +    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_MINIMUM )
> +        data->minimum = set_hwp->minimum;
> +
> +    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_MAXIMUM )
> +        data->maximum = set_hwp->maximum;
> +
> +    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_ENERGY_PERF )
> +        data->energy_perf = set_hwp->energy_perf;
> +
> +    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_DESIRED )
> +        data->desired = set_hwp->desired;
> +
> +    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_ACT_WINDOW )
> +        data->activity_window = set_hwp->activity_window &
> +                                XEN_SYSCTL_HWP_ACT_WINDOW_MASK;
> +
> +    hwp_cpufreq_target(policy, 0, 0);
> +
> +    return 0;

I don't think you should assume here that hwp_cpufreq_target() will
only ever return 0. Plus by returning its return value here you
allow the compiler to tail-call optimize this code.

> --- a/xen/drivers/acpi/pmstat.c
> +++ b/xen/drivers/acpi/pmstat.c
> @@ -398,6 +398,20 @@ static int set_cpufreq_para(struct xen_sysctl_pm_op *op)
>      return ret;
>  }
>  
> +static int set_cpufreq_hwp(struct xen_sysctl_pm_op *op)

const?

> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -317,6 +317,34 @@ struct xen_hwp_para {
>      uint8_t energy_perf;
>  };
>  
> +/* set multiple values simultaneously when set_args bit is set */

What "set_args bit" does this comment refer to?

> +struct xen_set_hwp_para {
> +#define XEN_SYSCTL_HWP_SET_DESIRED              (1U << 0)
> +#define XEN_SYSCTL_HWP_SET_ENERGY_PERF          (1U << 1)
> +#define XEN_SYSCTL_HWP_SET_ACT_WINDOW           (1U << 2)
> +#define XEN_SYSCTL_HWP_SET_MINIMUM              (1U << 3)
> +#define XEN_SYSCTL_HWP_SET_MAXIMUM              (1U << 4)
> +#define XEN_SYSCTL_HWP_SET_PRESET_MASK          0xf000
> +#define XEN_SYSCTL_HWP_SET_PRESET_NONE          0x0000
> +#define XEN_SYSCTL_HWP_SET_PRESET_BALANCE       0x1000
> +#define XEN_SYSCTL_HWP_SET_PRESET_POWERSAVE     0x2000
> +#define XEN_SYSCTL_HWP_SET_PRESET_PERFORMANCE   0x3000
> +#define XEN_SYSCTL_HWP_SET_PARAM_MASK ( \
> +                                  XEN_SYSCTL_HWP_SET_PRESET_MASK | \
> +                                  XEN_SYSCTL_HWP_SET_DESIRED     | \
> +                                  XEN_SYSCTL_HWP_SET_ENERGY_PERF | \
> +                                  XEN_SYSCTL_HWP_SET_ACT_WINDOW  | \
> +                                  XEN_SYSCTL_HWP_SET_MINIMUM     | \
> +                                  XEN_SYSCTL_HWP_SET_MAXIMUM     )
> +    uint16_t set_params; /* bitflags for valid values */
> +#define XEN_SYSCTL_HWP_ACT_WINDOW_MASK          0x03ff
> +    uint16_t activity_window; /* See comment in struct xen_hwp_para */
> +    uint8_t minimum;
> +    uint8_t maximum;
> +    uint8_t desired;
> +    uint8_t energy_perf; /* 0-255 or 0-15 depending on HW support */

Instead of (or in addition to) the "HW support" reference, could this
gain a reference to the "get para" bit determining which range to use?

Jan
Jason Andryuk May 22, 2023, 12:45 p.m. UTC | #2
On Mon, May 8, 2023 at 7:27 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 01.05.2023 21:30, Jason Andryuk wrote:
> > @@ -531,6 +533,100 @@ int get_hwp_para(const struct cpufreq_policy *policy,
> >      return 0;
> >  }
> >
> > +int set_hwp_para(struct cpufreq_policy *policy,
> > +                 struct xen_set_hwp_para *set_hwp)
>
> const?

set_hwp can be const.  policy is passed to hwp_cpufreq_target() &
on_selected_cpus(), so it cannot readily be made const.

> > +{
> > +    unsigned int cpu = policy->cpu;
> > +    struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
> > +
> > +    if ( data == NULL )
> > +        return -EINVAL;
> > +
> > +    /* Validate all parameters first */
> > +    if ( set_hwp->set_params & ~XEN_SYSCTL_HWP_SET_PARAM_MASK )
> > +        return -EINVAL;
> > +
> > +    if ( set_hwp->activity_window & ~XEN_SYSCTL_HWP_ACT_WINDOW_MASK )
> > +        return -EINVAL;
>
> Below you limit checks to when the respective control bit is set. I
> think you want the same here.

Not sure if you mean feature_hwp_activity_window or the bit in
set_params as control bit.  But, yes, they can both use some
additional checking.  IIRC, I wanted to always check
~XEN_SYSCTL_HWP_ACT_WINDOW_MASK, because bits should never be set
whether or not the activity window is supported by hardware.

> > +    if ( !feature_hwp_energy_perf &&
> > +         (set_hwp->set_params & XEN_SYSCTL_HWP_SET_ENERGY_PERF) &&
> > +         set_hwp->energy_perf > IA32_ENERGY_BIAS_MAX_POWERSAVE )
> > +        return -EINVAL;
> > +
> > +    if ( (set_hwp->set_params & XEN_SYSCTL_HWP_SET_DESIRED) &&
> > +         set_hwp->desired != 0 &&
> > +         (set_hwp->desired < data->hw.lowest ||
> > +          set_hwp->desired > data->hw.highest) )
> > +        return -EINVAL;
> > +
> > +    /*
> > +     * minimum & maximum are not validated as hardware doesn't seem to care
> > +     * and the SDM says CPUs will clip internally.
> > +     */
> > +
> > +    /* Apply presets */
> > +    switch ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_PRESET_MASK )
> > +    {
> > +    case XEN_SYSCTL_HWP_SET_PRESET_POWERSAVE:
> > +        data->minimum = data->hw.lowest;
> > +        data->maximum = data->hw.lowest;
> > +        data->activity_window = 0;
> > +        if ( feature_hwp_energy_perf )
> > +            data->energy_perf = HWP_ENERGY_PERF_MAX_POWERSAVE;
> > +        else
> > +            data->energy_perf = IA32_ENERGY_BIAS_MAX_POWERSAVE;
> > +        data->desired = 0;
> > +        break;
> > +
> > +    case XEN_SYSCTL_HWP_SET_PRESET_PERFORMANCE:
> > +        data->minimum = data->hw.highest;
> > +        data->maximum = data->hw.highest;
> > +        data->activity_window = 0;
> > +        data->energy_perf = HWP_ENERGY_PERF_MAX_PERFORMANCE;
> > +        data->desired = 0;
> > +        break;
> > +
> > +    case XEN_SYSCTL_HWP_SET_PRESET_BALANCE:
> > +        data->minimum = data->hw.lowest;
> > +        data->maximum = data->hw.highest;
> > +        data->activity_window = 0;
> > +        if ( feature_hwp_energy_perf )
> > +            data->energy_perf = HWP_ENERGY_PERF_BALANCE;
> > +        else
> > +            data->energy_perf = IA32_ENERGY_BIAS_BALANCE;
> > +        data->desired = 0;
> > +        break;
> > +
> > +    case XEN_SYSCTL_HWP_SET_PRESET_NONE:
> > +        break;
> > +
> > +    default:
> > +        return -EINVAL;
> > +    }
>
> So presets set all the values for which the individual item control bits
> are clear. That's not exactly what I would have expected, and it took me
> reading the code several times until I realized that you write life per-
> CPU data fields here, not fields of some intermediate variable. I think
> this could do with saying explicitly in the public header (if indeed the
> intended model).

The commit message mentioned the idea of using a preset and further
refinement.  The comments above "/* Apply presets */" and below "/*
Further customize presets if needed */" were an attempt to highlight
that.  But you are right that the public header should state this
clearly.

> > +    /* Further customize presets if needed */
> > +    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_MINIMUM )
> > +        data->minimum = set_hwp->minimum;
> > +
> > +    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_MAXIMUM )
> > +        data->maximum = set_hwp->maximum;
> > +
> > +    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_ENERGY_PERF )
> > +        data->energy_perf = set_hwp->energy_perf;
> > +
> > +    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_DESIRED )
> > +        data->desired = set_hwp->desired;
> > +
> > +    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_ACT_WINDOW )
> > +        data->activity_window = set_hwp->activity_window &
> > +                                XEN_SYSCTL_HWP_ACT_WINDOW_MASK;
> > +
> > +    hwp_cpufreq_target(policy, 0, 0);
> > +
> > +    return 0;
>
> I don't think you should assume here that hwp_cpufreq_target() will
> only ever return 0. Plus by returning its return value here you
> allow the compiler to tail-call optimize this code.

Thanks for catching that.  Yeah, I made hwp_cpufreq_target() return a
value per your earlier comment, so its value should be returned now.

> > --- a/xen/drivers/acpi/pmstat.c
> > +++ b/xen/drivers/acpi/pmstat.c
> > @@ -398,6 +398,20 @@ static int set_cpufreq_para(struct xen_sysctl_pm_op *op)
> >      return ret;
> >  }
> >
> > +static int set_cpufreq_hwp(struct xen_sysctl_pm_op *op)
>
> const?

Yes

> > --- a/xen/include/public/sysctl.h
> > +++ b/xen/include/public/sysctl.h
> > @@ -317,6 +317,34 @@ struct xen_hwp_para {
> >      uint8_t energy_perf;
> >  };
> >
> > +/* set multiple values simultaneously when set_args bit is set */
>
> What "set_args bit" does this comment refer to?

That should be set_params. IIRC, set_args was the previous name.

> > +struct xen_set_hwp_para {
> > +#define XEN_SYSCTL_HWP_SET_DESIRED              (1U << 0)
> > +#define XEN_SYSCTL_HWP_SET_ENERGY_PERF          (1U << 1)
> > +#define XEN_SYSCTL_HWP_SET_ACT_WINDOW           (1U << 2)
> > +#define XEN_SYSCTL_HWP_SET_MINIMUM              (1U << 3)
> > +#define XEN_SYSCTL_HWP_SET_MAXIMUM              (1U << 4)
> > +#define XEN_SYSCTL_HWP_SET_PRESET_MASK          0xf000
> > +#define XEN_SYSCTL_HWP_SET_PRESET_NONE          0x0000
> > +#define XEN_SYSCTL_HWP_SET_PRESET_BALANCE       0x1000
> > +#define XEN_SYSCTL_HWP_SET_PRESET_POWERSAVE     0x2000
> > +#define XEN_SYSCTL_HWP_SET_PRESET_PERFORMANCE   0x3000
> > +#define XEN_SYSCTL_HWP_SET_PARAM_MASK ( \
> > +                                  XEN_SYSCTL_HWP_SET_PRESET_MASK | \
> > +                                  XEN_SYSCTL_HWP_SET_DESIRED     | \
> > +                                  XEN_SYSCTL_HWP_SET_ENERGY_PERF | \
> > +                                  XEN_SYSCTL_HWP_SET_ACT_WINDOW  | \
> > +                                  XEN_SYSCTL_HWP_SET_MINIMUM     | \
> > +                                  XEN_SYSCTL_HWP_SET_MAXIMUM     )
> > +    uint16_t set_params; /* bitflags for valid values */
> > +#define XEN_SYSCTL_HWP_ACT_WINDOW_MASK          0x03ff
> > +    uint16_t activity_window; /* See comment in struct xen_hwp_para */
> > +    uint8_t minimum;
> > +    uint8_t maximum;
> > +    uint8_t desired;
> > +    uint8_t energy_perf; /* 0-255 or 0-15 depending on HW support */
>
> Instead of (or in addition to) the "HW support" reference, could this
> gain a reference to the "get para" bit determining which range to use?

I've removed the fallback 0-15 support locally, so this will be removed.

Regards,
Jason
Jan Beulich May 22, 2023, 1:10 p.m. UTC | #3
On 22.05.2023 14:45, Jason Andryuk wrote:
> On Mon, May 8, 2023 at 7:27 AM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 01.05.2023 21:30, Jason Andryuk wrote:
>>> @@ -531,6 +533,100 @@ int get_hwp_para(const struct cpufreq_policy *policy,
>>>      return 0;
>>>  }
>>>
>>> +int set_hwp_para(struct cpufreq_policy *policy,
>>> +                 struct xen_set_hwp_para *set_hwp)
>>
>> const?
> 
> set_hwp can be const.  policy is passed to hwp_cpufreq_target() &
> on_selected_cpus(), so it cannot readily be made const.

I was only meaning the 2nd parameter, yes.

>>> +{
>>> +    unsigned int cpu = policy->cpu;
>>> +    struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
>>> +
>>> +    if ( data == NULL )
>>> +        return -EINVAL;
>>> +
>>> +    /* Validate all parameters first */
>>> +    if ( set_hwp->set_params & ~XEN_SYSCTL_HWP_SET_PARAM_MASK )
>>> +        return -EINVAL;
>>> +
>>> +    if ( set_hwp->activity_window & ~XEN_SYSCTL_HWP_ACT_WINDOW_MASK )
>>> +        return -EINVAL;
>>
>> Below you limit checks to when the respective control bit is set. I
>> think you want the same here.
> 
> Not sure if you mean feature_hwp_activity_window or the bit in
> set_params as control bit.  But, yes, they can both use some
> additional checking.  IIRC, I wanted to always check
> ~XEN_SYSCTL_HWP_ACT_WINDOW_MASK, because bits should never be set
> whether or not the activity window is supported by hardware.

I took ...

>>> +    if ( !feature_hwp_energy_perf &&
>>> +         (set_hwp->set_params & XEN_SYSCTL_HWP_SET_ENERGY_PERF) &&
>>> +         set_hwp->energy_perf > IA32_ENERGY_BIAS_MAX_POWERSAVE )
>>> +        return -EINVAL;
>>> +
>>> +    if ( (set_hwp->set_params & XEN_SYSCTL_HWP_SET_DESIRED) &&
>>> +         set_hwp->desired != 0 &&
>>> +         (set_hwp->desired < data->hw.lowest ||
>>> +          set_hwp->desired > data->hw.highest) )
>>> +        return -EINVAL;

... e.g. this for comparison, where you apply the range check only when
the XEN_SYSCTL_HWP_* bit is set. I think you want to be consistent in
such checking: Either you always allow the caller to not care about
fields that aren't going to be consumed when their controlling bit is
off, or you always check validity. Both approaches have their pros and
cons, I think.

Jan
Jason Andryuk May 22, 2023, 2:43 p.m. UTC | #4
On Mon, May 22, 2023 at 9:11 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 22.05.2023 14:45, Jason Andryuk wrote:
> > On Mon, May 8, 2023 at 7:27 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> On 01.05.2023 21:30, Jason Andryuk wrote:
> >>> +    if ( set_hwp->activity_window & ~XEN_SYSCTL_HWP_ACT_WINDOW_MASK )
> >>> +        return -EINVAL;
> >>
> >> Below you limit checks to when the respective control bit is set. I
> >> think you want the same here.
> >
> > Not sure if you mean feature_hwp_activity_window or the bit in
> > set_params as control bit.  But, yes, they can both use some
> > additional checking.  IIRC, I wanted to always check
> > ~XEN_SYSCTL_HWP_ACT_WINDOW_MASK, because bits should never be set
> > whether or not the activity window is supported by hardware.
>
> I took ...
>
> >>> +    if ( !feature_hwp_energy_perf &&
> >>> +         (set_hwp->set_params & XEN_SYSCTL_HWP_SET_ENERGY_PERF) &&
> >>> +         set_hwp->energy_perf > IA32_ENERGY_BIAS_MAX_POWERSAVE )
> >>> +        return -EINVAL;
> >>> +
> >>> +    if ( (set_hwp->set_params & XEN_SYSCTL_HWP_SET_DESIRED) &&
> >>> +         set_hwp->desired != 0 &&
> >>> +         (set_hwp->desired < data->hw.lowest ||
> >>> +          set_hwp->desired > data->hw.highest) )
> >>> +        return -EINVAL;
>
> ... e.g. this for comparison, where you apply the range check only when
> the XEN_SYSCTL_HWP_* bit is set. I think you want to be consistent in
> such checking: Either you always allow the caller to not care about
> fields that aren't going to be consumed when their controlling bit is
> off, or you always check validity. Both approaches have their pros and
> cons, I think.

Ok, good point.  I wrote it inconsistently because the SDM states the
desired limit: "When set to a non-zero value (between the range of
Lowest_Performance and Highest_Performance of IA32_HWP_CAPABILITIES)
conveys an explicit performance request hint to the hardware;
effectively disabling HW Autonomous selection."  And I was trying to
follow that.  But later "The HWP hardware clips and resolves the field
values as necessary to the valid range." seems to override that.  I'll
test to verify that it is correct, and drop the lowest/highest
checking if so.

Thanks,
Jason
diff mbox series

Patch

diff --git a/xen/arch/x86/acpi/cpufreq/hwp.c b/xen/arch/x86/acpi/cpufreq/hwp.c
index cb52918799..3d15875dc1 100644
--- a/xen/arch/x86/acpi/cpufreq/hwp.c
+++ b/xen/arch/x86/acpi/cpufreq/hwp.c
@@ -27,7 +27,9 @@  static bool feature_hdc;
 __initdata bool opt_cpufreq_hwp = false;
 __initdata bool opt_cpufreq_hdc = true;
 
+#define HWP_ENERGY_PERF_MAX_PERFORMANCE 0
 #define HWP_ENERGY_PERF_BALANCE         0x80
+#define HWP_ENERGY_PERF_MAX_POWERSAVE   0xff
 #define IA32_ENERGY_BIAS_BALANCE        0x7
 #define IA32_ENERGY_BIAS_MAX_POWERSAVE  0xf
 #define IA32_ENERGY_BIAS_MASK           0xf
@@ -531,6 +533,100 @@  int get_hwp_para(const struct cpufreq_policy *policy,
     return 0;
 }
 
+int set_hwp_para(struct cpufreq_policy *policy,
+                 struct xen_set_hwp_para *set_hwp)
+{
+    unsigned int cpu = policy->cpu;
+    struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
+
+    if ( data == NULL )
+        return -EINVAL;
+
+    /* Validate all parameters first */
+    if ( set_hwp->set_params & ~XEN_SYSCTL_HWP_SET_PARAM_MASK )
+        return -EINVAL;
+
+    if ( set_hwp->activity_window & ~XEN_SYSCTL_HWP_ACT_WINDOW_MASK )
+        return -EINVAL;
+
+    if ( !feature_hwp_energy_perf &&
+         (set_hwp->set_params & XEN_SYSCTL_HWP_SET_ENERGY_PERF) &&
+         set_hwp->energy_perf > IA32_ENERGY_BIAS_MAX_POWERSAVE )
+        return -EINVAL;
+
+    if ( (set_hwp->set_params & XEN_SYSCTL_HWP_SET_DESIRED) &&
+         set_hwp->desired != 0 &&
+         (set_hwp->desired < data->hw.lowest ||
+          set_hwp->desired > data->hw.highest) )
+        return -EINVAL;
+
+    /*
+     * minimum & maximum are not validated as hardware doesn't seem to care
+     * and the SDM says CPUs will clip internally.
+     */
+
+    /* Apply presets */
+    switch ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_PRESET_MASK )
+    {
+    case XEN_SYSCTL_HWP_SET_PRESET_POWERSAVE:
+        data->minimum = data->hw.lowest;
+        data->maximum = data->hw.lowest;
+        data->activity_window = 0;
+        if ( feature_hwp_energy_perf )
+            data->energy_perf = HWP_ENERGY_PERF_MAX_POWERSAVE;
+        else
+            data->energy_perf = IA32_ENERGY_BIAS_MAX_POWERSAVE;
+        data->desired = 0;
+        break;
+
+    case XEN_SYSCTL_HWP_SET_PRESET_PERFORMANCE:
+        data->minimum = data->hw.highest;
+        data->maximum = data->hw.highest;
+        data->activity_window = 0;
+        data->energy_perf = HWP_ENERGY_PERF_MAX_PERFORMANCE;
+        data->desired = 0;
+        break;
+
+    case XEN_SYSCTL_HWP_SET_PRESET_BALANCE:
+        data->minimum = data->hw.lowest;
+        data->maximum = data->hw.highest;
+        data->activity_window = 0;
+        if ( feature_hwp_energy_perf )
+            data->energy_perf = HWP_ENERGY_PERF_BALANCE;
+        else
+            data->energy_perf = IA32_ENERGY_BIAS_BALANCE;
+        data->desired = 0;
+        break;
+
+    case XEN_SYSCTL_HWP_SET_PRESET_NONE:
+        break;
+
+    default:
+        return -EINVAL;
+    }
+
+    /* Further customize presets if needed */
+    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_MINIMUM )
+        data->minimum = set_hwp->minimum;
+
+    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_MAXIMUM )
+        data->maximum = set_hwp->maximum;
+
+    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_ENERGY_PERF )
+        data->energy_perf = set_hwp->energy_perf;
+
+    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_DESIRED )
+        data->desired = set_hwp->desired;
+
+    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_ACT_WINDOW )
+        data->activity_window = set_hwp->activity_window &
+                                XEN_SYSCTL_HWP_ACT_WINDOW_MASK;
+
+    hwp_cpufreq_target(policy, 0, 0);
+
+    return 0;
+}
+
 int __init hwp_register_driver(void)
 {
     return cpufreq_register_driver(&hwp_cpufreq_driver);
diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c
index 67fd9dabd4..12c76f5e57 100644
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -398,6 +398,20 @@  static int set_cpufreq_para(struct xen_sysctl_pm_op *op)
     return ret;
 }
 
+static int set_cpufreq_hwp(struct xen_sysctl_pm_op *op)
+{
+    struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_policy, op->cpuid);
+
+    if ( !policy || !policy->governor )
+        return -EINVAL;
+
+    if ( strncasecmp(policy->governor->name, XEN_HWP_GOVERNOR,
+                     CPUFREQ_NAME_LEN) )
+        return -EINVAL;
+
+    return set_hwp_para(policy, &op->u.set_hwp);
+}
+
 int do_pm_op(struct xen_sysctl_pm_op *op)
 {
     int ret = 0;
@@ -470,6 +484,10 @@  int do_pm_op(struct xen_sysctl_pm_op *op)
         break;
     }
 
+    case SET_CPUFREQ_HWP:
+        ret = set_cpufreq_hwp(op);
+        break;
+
     case GET_CPUFREQ_AVGFREQ:
     {
         op->u.get_avgfreq = cpufreq_driver_getavg(op->cpuid, USR_GETAVG);
diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h
index 92b4c7e79c..b8831b2cd3 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -249,5 +249,7 @@  extern bool opt_cpufreq_hwp;
 extern bool opt_cpufreq_hdc;
 int get_hwp_para(const struct cpufreq_policy *policy,
                  struct xen_hwp_para *hwp_para);
+int set_hwp_para(struct cpufreq_policy *policy,
+                 struct xen_set_hwp_para *set_hwp);
 
 #endif /* __XEN_CPUFREQ_PM_H__ */
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index bf7e6594a7..3242472cbe 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -317,6 +317,34 @@  struct xen_hwp_para {
     uint8_t energy_perf;
 };
 
+/* set multiple values simultaneously when set_args bit is set */
+struct xen_set_hwp_para {
+#define XEN_SYSCTL_HWP_SET_DESIRED              (1U << 0)
+#define XEN_SYSCTL_HWP_SET_ENERGY_PERF          (1U << 1)
+#define XEN_SYSCTL_HWP_SET_ACT_WINDOW           (1U << 2)
+#define XEN_SYSCTL_HWP_SET_MINIMUM              (1U << 3)
+#define XEN_SYSCTL_HWP_SET_MAXIMUM              (1U << 4)
+#define XEN_SYSCTL_HWP_SET_PRESET_MASK          0xf000
+#define XEN_SYSCTL_HWP_SET_PRESET_NONE          0x0000
+#define XEN_SYSCTL_HWP_SET_PRESET_BALANCE       0x1000
+#define XEN_SYSCTL_HWP_SET_PRESET_POWERSAVE     0x2000
+#define XEN_SYSCTL_HWP_SET_PRESET_PERFORMANCE   0x3000
+#define XEN_SYSCTL_HWP_SET_PARAM_MASK ( \
+                                  XEN_SYSCTL_HWP_SET_PRESET_MASK | \
+                                  XEN_SYSCTL_HWP_SET_DESIRED     | \
+                                  XEN_SYSCTL_HWP_SET_ENERGY_PERF | \
+                                  XEN_SYSCTL_HWP_SET_ACT_WINDOW  | \
+                                  XEN_SYSCTL_HWP_SET_MINIMUM     | \
+                                  XEN_SYSCTL_HWP_SET_MAXIMUM     )
+    uint16_t set_params; /* bitflags for valid values */
+#define XEN_SYSCTL_HWP_ACT_WINDOW_MASK          0x03ff
+    uint16_t activity_window; /* See comment in struct xen_hwp_para */
+    uint8_t minimum;
+    uint8_t maximum;
+    uint8_t desired;
+    uint8_t energy_perf; /* 0-255 or 0-15 depending on HW support */
+};
+
 #define XEN_HWP_GOVERNOR "hwp-internal"
 /*
  * cpufreq para name of this structure named
@@ -379,6 +407,7 @@  struct xen_sysctl_pm_op {
     #define SET_CPUFREQ_GOV            (CPUFREQ_PARA | 0x02)
     #define SET_CPUFREQ_PARA           (CPUFREQ_PARA | 0x03)
     #define GET_CPUFREQ_AVGFREQ        (CPUFREQ_PARA | 0x04)
+    #define SET_CPUFREQ_HWP            (CPUFREQ_PARA | 0x05)
 
     /* set/reset scheduler power saving option */
     #define XEN_SYSCTL_pm_op_set_sched_opt_smt    0x21
@@ -405,6 +434,7 @@  struct xen_sysctl_pm_op {
         struct xen_get_cpufreq_para get_para;
         struct xen_set_cpufreq_gov  set_gov;
         struct xen_set_cpufreq_para set_para;
+        struct xen_set_hwp_para     set_hwp;
         uint64_aligned_t get_avgfreq;
         uint32_t                    set_sched_opt_smt;
 #define XEN_SYSCTL_CX_UNLIMITED 0xffffffff