diff mbox series

[v5,12/15] xen: Add SET_CPUFREQ_HWP xen_sysctl_pm_op

Message ID 20230706185440.48333-13-jandryuk@gmail.com (mailing list archive)
State Superseded
Headers show
Series Intel Hardware P-States (HWP) support | expand

Commit Message

Jason Andryuk July 6, 2023, 6:54 p.m. UTC
Add SET_CPUFREQ_HWP xen_sysctl_pm_op to set HWP parameters.  The sysctl
supports setting multiple values simultaneously as indicated by the
set_params bits.  This allows atomically applying new HWP configuration
via a single wrmsr.

XEN_SYSCTL_HWP_SET_PRESET_BALANCE/PERFORMANCE/POWERSAVE provide three
common presets.  Setting them depends on hardware limits which the
hypervisor is already caching.  So using them allows skipping a
hypercall to query the limits (lowest/highest) to then set those same
values.  The code is organized to allow a preset to be refined with
additional parameters if desired.

"most_efficient" and "guaranteed" could be additional presets in the
future, but the are not added now.  Those levels can change at runtime,
but we don't have code in place to monitor and update for those events.

Since activity window may not be supported by all hardware, omit writing
it when not supported, and return that fact to userspace by updating
set_params.

CPPC parameter checking disallows setting reserved bytes and ensure
values are only non-zero when the corresponding set_params bit is set.
There is no range checking (0-255 is allowed) since hardware is
documented to clip internally.

Signed-off-by: Jason Andryuk <jandryuk@gmail.com>

---
v5:
Use cpufreq.h for include
Add () around bit test
Fix Configure typo
Drop duplicated energy_perf comment
Define XEN_SYSCTL_CPPC_ACT_WINDOW_MASK from *_MANTISSA_MASK and *_EXPONENT_MASK
Re-order XEN_SYSCTL_CPPC_SET_* flags to match field and hardware order.
Remove const from set_cppc param to update set_params
Skip Activity Window if not supported by hardware and clear set_params
Make parameter parsing consistent
Add an exit path when there are no parameters to write.
Expand the header file to cover the IN/OUT set_params.
Remove the "desired" lowest/highest checking as hardware clips internally

v4:
Remove IA32_ENERGY_BIAS support
Validate parameters don't exceed 255
Use CPPC/cppc name
set_cppc_para() add const
set_cppc_para() return hwp_cpufreq_target()
Expand sysctl comments

v3:
Remove cpufreq_governor_internal from set_cpufreq_hwp

v2:
Update for naming anonymous union
Drop hwp_err for invalid input in set_hwp_para()
Drop uint16_t cast in XEN_SYSCTL_HWP_SET_PARAM_MASK
Drop parens for HWP_SET_PRESET defines
Reference activity_window format comment
Place SET_CPUFREQ_HWP after SET_CPUFREQ_PARA
Add {HWP,IA32}_ENERGY_PERF_MAX_{PERFORMANCE,POWERSAVE} defines
Order defines before fields in sysctl.h
Use XEN_HWP_GOVERNOR
Use per_cpu for hwp_drv_data
---
 xen/arch/x86/acpi/cpufreq/hwp.c    | 101 +++++++++++++++++++++++++++++
 xen/drivers/acpi/pmstat.c          |  17 +++++
 xen/include/acpi/cpufreq/cpufreq.h |   2 +
 xen/include/public/sysctl.h        |  64 ++++++++++++++++++
 4 files changed, 184 insertions(+)

Comments

Jan Beulich July 13, 2023, 1:02 p.m. UTC | #1
On 06.07.2023 20:54, Jason Andryuk wrote:
> @@ -531,6 +535,103 @@ int get_hwp_para(unsigned int cpu,
>      return 0;
>  }
>  
> +int set_hwp_para(struct cpufreq_policy *policy,
> +                 struct xen_set_cppc_para *set_cppc)
> +{
> +    unsigned int cpu = policy->cpu;
> +    struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
> +    bool cleared_act_window = false;
> +
> +    if ( data == NULL )
> +        return -EINVAL;

I don't think EINVAL is appropriate here. EOPNOTSUPP might be, or ENOENT,
or EIO, or perhaps a few others.

> +    /* Validate all parameters - Disallow reserved bits. */
> +    if ( set_cppc->minimum > 255 ||
> +         set_cppc->maximum > 255 ||
> +         set_cppc->desired > 255 ||
> +         set_cppc->energy_perf > 255 ||
> +         set_cppc->set_params & ~XEN_SYSCTL_CPPC_SET_PARAM_MASK ||
> +         set_cppc->activity_window & ~XEN_SYSCTL_CPPC_ACT_WINDOW_MASK )

Nit: Parentheses again please around the operands of &.

> +        return -EINVAL;
> +
> +    /* Only allow values if params bit is set. */
> +    if ( (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED) &&
> +          set_cppc->desired) ||
> +         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_MINIMUM) &&
> +          set_cppc->minimum) ||
> +         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_MAXIMUM) &&
> +          set_cppc->maximum) ||
> +         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ENERGY_PERF) &&
> +          set_cppc->energy_perf) ||
> +         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ACT_WINDOW) &&
> +          set_cppc->activity_window) )
> +        return -EINVAL;
> +
> +    /* Clear out activity window if lacking HW supported. */
> +    if ( (set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ACT_WINDOW) &&
> +         !feature_hwp_activity_window ) {
> +        set_cppc->set_params &= ~XEN_SYSCTL_CPPC_SET_ACT_WINDOW;
> +        cleared_act_window = true;
> +    }
> +
> +    /* Return if there is nothing to do. */
> +    if ( set_cppc->set_params == 0 )
> +        return cleared_act_window ? 0 : -EINVAL;

Is it really necessary to return an error when there's nothing to do?
We have various hypercalls which can degenerate to no-ops under
certain conditions, and which simply return success then.

> --- a/xen/drivers/acpi/pmstat.c
> +++ b/xen/drivers/acpi/pmstat.c
> @@ -400,6 +400,19 @@ static int set_cpufreq_para(struct xen_sysctl_pm_op *op)
>      return ret;
>  }
>  
> +static int set_cpufreq_cppc(struct xen_sysctl_pm_op *op)
> +{
> +    struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_policy, op->cpuid);
> +
> +    if ( !policy || !policy->governor )
> +        return -EINVAL;
> +
> +    if ( !hwp_active() )
> +        return -EINVAL;

In both cases I again wonder in how far EINVAL is really appropriate.

> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -351,6 +351,68 @@ struct xen_cppc_para {
>      uint32_t activity_window;
>  };
>  
> +/*
> + * Set CPPC values.
> + *
> + * Configure the parameters for CPPC.  Set bits in set_params control which
> + * values are applied.  If a bit is not set in set_params, the field must be
> + * zero.
> + *
> + * For HWP specifically, values must be limited to 0-255 or within
> + * XEN_SYSCTL_CPPC_ACT_WINDOW_MASK for activity window.  Set bits outside the
> + * range will be returned as -EINVAL.
> + *
> + * Activity Window may not be supported by the hardware.  In that case, the
> + * returned set_params will clear XEN_SYSCTL_CPPC_SET_ACT_WINDOW to indicate
> + * that it was not applied - though the rest of the values will be applied.
> + *
> + * There are a set of presets along with individual fields.  Presets are
> + * applied first, and then individual fields.  This allows customizing
> + * a preset without having to specify every value.
> + *
> + * The preset options values are as follows:
> + *
> + * preset      | minimum | maxium  | energy_perf
> + * ------------+---------+---------+----------------
> + * powersave   | lowest  | lowest  | powersave (255)
> + * ------------+---------+---------+----------------
> + * balance     | lowest  | highest | balance (128)
> + * ------------+---------+---------+----------------
> + * performance | highest | highest | performance (0)
> + *
> + * desired and activity_window are set to 0, hardware selected.
> + */
> +struct xen_set_cppc_para {
> +#define XEN_SYSCTL_CPPC_SET_MINIMUM              (1U << 0)
> +#define XEN_SYSCTL_CPPC_SET_MAXIMUM              (1U << 1)
> +#define XEN_SYSCTL_CPPC_SET_DESIRED              (1U << 2)
> +#define XEN_SYSCTL_CPPC_SET_ENERGY_PERF          (1U << 3)
> +#define XEN_SYSCTL_CPPC_SET_ACT_WINDOW           (1U << 4)
> +#define XEN_SYSCTL_CPPC_SET_PRESET_MASK          0xf0000000
> +#define XEN_SYSCTL_CPPC_SET_PRESET_NONE          0x00000000
> +#define XEN_SYSCTL_CPPC_SET_PRESET_BALANCE       0x10000000
> +#define XEN_SYSCTL_CPPC_SET_PRESET_POWERSAVE     0x20000000
> +#define XEN_SYSCTL_CPPC_SET_PRESET_PERFORMANCE   0x30000000

As corrections for the respective Misra rule are in the process of
being merged, please add U suffixes here (at the very least on the
_MASK).

Jan
Jason Andryuk July 13, 2023, 4:12 p.m. UTC | #2
On Thu, Jul 13, 2023 at 9:02 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 06.07.2023 20:54, Jason Andryuk wrote:
> > @@ -531,6 +535,103 @@ int get_hwp_para(unsigned int cpu,
> >      return 0;
> >  }
> >
> > +int set_hwp_para(struct cpufreq_policy *policy,
> > +                 struct xen_set_cppc_para *set_cppc)
> > +{
> > +    unsigned int cpu = policy->cpu;
> > +    struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
> > +    bool cleared_act_window = false;
> > +
> > +    if ( data == NULL )
> > +        return -EINVAL;
>
> I don't think EINVAL is appropriate here. EOPNOTSUPP might be, or ENOENT,
> or EIO, or perhaps a few others.

Yes.  ENOENT seems good here since a NULL data is comparable to not existing.

> > +    /* Validate all parameters - Disallow reserved bits. */
> > +    if ( set_cppc->minimum > 255 ||
> > +         set_cppc->maximum > 255 ||
> > +         set_cppc->desired > 255 ||
> > +         set_cppc->energy_perf > 255 ||
> > +         set_cppc->set_params & ~XEN_SYSCTL_CPPC_SET_PARAM_MASK ||
> > +         set_cppc->activity_window & ~XEN_SYSCTL_CPPC_ACT_WINDOW_MASK )
>
> Nit: Parentheses again please around the operands of &.

Sure

> > +        return -EINVAL;
> > +
> > +    /* Only allow values if params bit is set. */
> > +    if ( (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED) &&
> > +          set_cppc->desired) ||
> > +         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_MINIMUM) &&
> > +          set_cppc->minimum) ||
> > +         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_MAXIMUM) &&
> > +          set_cppc->maximum) ||
> > +         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ENERGY_PERF) &&
> > +          set_cppc->energy_perf) ||
> > +         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ACT_WINDOW) &&
> > +          set_cppc->activity_window) )
> > +        return -EINVAL;
> > +
> > +    /* Clear out activity window if lacking HW supported. */
> > +    if ( (set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ACT_WINDOW) &&
> > +         !feature_hwp_activity_window ) {
> > +        set_cppc->set_params &= ~XEN_SYSCTL_CPPC_SET_ACT_WINDOW;
> > +        cleared_act_window = true;
> > +    }
> > +
> > +    /* Return if there is nothing to do. */
> > +    if ( set_cppc->set_params == 0 )
> > +        return cleared_act_window ? 0 : -EINVAL;
>
> Is it really necessary to return an error when there's nothing to do?
> We have various hypercalls which can degenerate to no-ops under
> certain conditions, and which simply return success then.

With all the earlier parameter checking, I think it would be fine to
return success here for a no-op.

> > --- a/xen/drivers/acpi/pmstat.c
> > +++ b/xen/drivers/acpi/pmstat.c
> > @@ -400,6 +400,19 @@ static int set_cpufreq_para(struct xen_sysctl_pm_op *op)
> >      return ret;
> >  }
> >
> > +static int set_cpufreq_cppc(struct xen_sysctl_pm_op *op)
> > +{
> > +    struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_policy, op->cpuid);
> > +
> > +    if ( !policy || !policy->governor )
> > +        return -EINVAL;
> > +
> > +    if ( !hwp_active() )
> > +        return -EINVAL;
>
> In both cases I again wonder in how far EINVAL is really appropriate.

-EOPNOTSUPP seems good for the !hwp_active() case.  Maybe ENOENT for
the policy one.

> > --- a/xen/include/public/sysctl.h
> > +++ b/xen/include/public/sysctl.h
> > @@ -351,6 +351,68 @@ struct xen_cppc_para {
> >      uint32_t activity_window;
> >  };
> >
> > +/*
> > + * Set CPPC values.
> > + *
> > + * Configure the parameters for CPPC.  Set bits in set_params control which
> > + * values are applied.  If a bit is not set in set_params, the field must be
> > + * zero.
> > + *
> > + * For HWP specifically, values must be limited to 0-255 or within
> > + * XEN_SYSCTL_CPPC_ACT_WINDOW_MASK for activity window.  Set bits outside the
> > + * range will be returned as -EINVAL.
> > + *
> > + * Activity Window may not be supported by the hardware.  In that case, the
> > + * returned set_params will clear XEN_SYSCTL_CPPC_SET_ACT_WINDOW to indicate
> > + * that it was not applied - though the rest of the values will be applied.
> > + *
> > + * There are a set of presets along with individual fields.  Presets are
> > + * applied first, and then individual fields.  This allows customizing
> > + * a preset without having to specify every value.
> > + *
> > + * The preset options values are as follows:
> > + *
> > + * preset      | minimum | maxium  | energy_perf
> > + * ------------+---------+---------+----------------
> > + * powersave   | lowest  | lowest  | powersave (255)
> > + * ------------+---------+---------+----------------
> > + * balance     | lowest  | highest | balance (128)
> > + * ------------+---------+---------+----------------
> > + * performance | highest | highest | performance (0)
> > + *
> > + * desired and activity_window are set to 0, hardware selected.
> > + */
> > +struct xen_set_cppc_para {
> > +#define XEN_SYSCTL_CPPC_SET_MINIMUM              (1U << 0)
> > +#define XEN_SYSCTL_CPPC_SET_MAXIMUM              (1U << 1)
> > +#define XEN_SYSCTL_CPPC_SET_DESIRED              (1U << 2)
> > +#define XEN_SYSCTL_CPPC_SET_ENERGY_PERF          (1U << 3)
> > +#define XEN_SYSCTL_CPPC_SET_ACT_WINDOW           (1U << 4)
> > +#define XEN_SYSCTL_CPPC_SET_PRESET_MASK          0xf0000000
> > +#define XEN_SYSCTL_CPPC_SET_PRESET_NONE          0x00000000
> > +#define XEN_SYSCTL_CPPC_SET_PRESET_BALANCE       0x10000000
> > +#define XEN_SYSCTL_CPPC_SET_PRESET_POWERSAVE     0x20000000
> > +#define XEN_SYSCTL_CPPC_SET_PRESET_PERFORMANCE   0x30000000
>
> As corrections for the respective Misra rule are in the process of
> being merged, please add U suffixes here (at the very least on the
> _MASK).

Sure.

Thanks,
Jason
diff mbox series

Patch

diff --git a/xen/arch/x86/acpi/cpufreq/hwp.c b/xen/arch/x86/acpi/cpufreq/hwp.c
index 50b66a0449..32df9af4b3 100644
--- a/xen/arch/x86/acpi/cpufreq/hwp.c
+++ b/xen/arch/x86/acpi/cpufreq/hwp.c
@@ -21,6 +21,10 @@  static bool __ro_after_init feature_hdc;
 
 static bool __ro_after_init opt_cpufreq_hdc = true;
 
+#define HWP_ENERGY_PERF_MAX_PERFORMANCE 0
+#define HWP_ENERGY_PERF_BALANCE         0x80
+#define HWP_ENERGY_PERF_MAX_POWERSAVE   0xff
+
 union hwp_request
 {
     struct
@@ -531,6 +535,103 @@  int get_hwp_para(unsigned int cpu,
     return 0;
 }
 
+int set_hwp_para(struct cpufreq_policy *policy,
+                 struct xen_set_cppc_para *set_cppc)
+{
+    unsigned int cpu = policy->cpu;
+    struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
+    bool cleared_act_window = false;
+
+    if ( data == NULL )
+        return -EINVAL;
+
+    /* Validate all parameters - Disallow reserved bits. */
+    if ( set_cppc->minimum > 255 ||
+         set_cppc->maximum > 255 ||
+         set_cppc->desired > 255 ||
+         set_cppc->energy_perf > 255 ||
+         set_cppc->set_params & ~XEN_SYSCTL_CPPC_SET_PARAM_MASK ||
+         set_cppc->activity_window & ~XEN_SYSCTL_CPPC_ACT_WINDOW_MASK )
+        return -EINVAL;
+
+    /* Only allow values if params bit is set. */
+    if ( (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED) &&
+          set_cppc->desired) ||
+         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_MINIMUM) &&
+          set_cppc->minimum) ||
+         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_MAXIMUM) &&
+          set_cppc->maximum) ||
+         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ENERGY_PERF) &&
+          set_cppc->energy_perf) ||
+         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ACT_WINDOW) &&
+          set_cppc->activity_window) )
+        return -EINVAL;
+
+    /* Clear out activity window if lacking HW supported. */
+    if ( (set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ACT_WINDOW) &&
+         !feature_hwp_activity_window ) {
+        set_cppc->set_params &= ~XEN_SYSCTL_CPPC_SET_ACT_WINDOW;
+        cleared_act_window = true;
+    }
+
+    /* Return if there is nothing to do. */
+    if ( set_cppc->set_params == 0 )
+        return cleared_act_window ? 0 : -EINVAL;
+
+    /* Apply presets */
+    switch ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_PRESET_MASK )
+    {
+    case XEN_SYSCTL_CPPC_SET_PRESET_POWERSAVE:
+        data->minimum = data->hw.lowest;
+        data->maximum = data->hw.lowest;
+        data->activity_window = 0;
+        data->energy_perf = HWP_ENERGY_PERF_MAX_POWERSAVE;
+        data->desired = 0;
+        break;
+
+    case XEN_SYSCTL_CPPC_SET_PRESET_PERFORMANCE:
+        data->minimum = data->hw.highest;
+        data->maximum = data->hw.highest;
+        data->activity_window = 0;
+        data->energy_perf = HWP_ENERGY_PERF_MAX_PERFORMANCE;
+        data->desired = 0;
+        break;
+
+    case XEN_SYSCTL_CPPC_SET_PRESET_BALANCE:
+        data->minimum = data->hw.lowest;
+        data->maximum = data->hw.highest;
+        data->activity_window = 0;
+        data->energy_perf = HWP_ENERGY_PERF_BALANCE;
+        data->desired = 0;
+        break;
+
+    case XEN_SYSCTL_CPPC_SET_PRESET_NONE:
+        break;
+
+    default:
+        return -EINVAL;
+    }
+
+    /* Further customize presets if needed */
+    if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_MINIMUM )
+        data->minimum = set_cppc->minimum;
+
+    if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_MAXIMUM )
+        data->maximum = set_cppc->maximum;
+
+    if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ENERGY_PERF )
+        data->energy_perf = set_cppc->energy_perf;
+
+    if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED )
+        data->desired = set_cppc->desired;
+
+    if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ACT_WINDOW )
+        data->activity_window = set_cppc->activity_window &
+                                XEN_SYSCTL_CPPC_ACT_WINDOW_MASK;
+
+    return hwp_cpufreq_target(policy, 0, 0);
+}
+
 int __init hwp_register_driver(void)
 {
     int ret;
diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c
index f674ef51aa..cfd7fdfb1c 100644
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -400,6 +400,19 @@  static int set_cpufreq_para(struct xen_sysctl_pm_op *op)
     return ret;
 }
 
+static int set_cpufreq_cppc(struct xen_sysctl_pm_op *op)
+{
+    struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_policy, op->cpuid);
+
+    if ( !policy || !policy->governor )
+        return -EINVAL;
+
+    if ( !hwp_active() )
+        return -EINVAL;
+
+    return set_hwp_para(policy, &op->u.set_cppc);
+}
+
 int do_pm_op(struct xen_sysctl_pm_op *op)
 {
     int ret = 0;
@@ -472,6 +485,10 @@  int do_pm_op(struct xen_sysctl_pm_op *op)
         break;
     }
 
+    case SET_CPUFREQ_CPPC:
+        ret = set_cpufreq_cppc(op);
+        break;
+
     case GET_CPUFREQ_AVGFREQ:
     {
         op->u.get_avgfreq = cpufreq_driver_getavg(op->cpuid, USR_GETAVG);
diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h
index c5636edf0e..c6b8c991b4 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -256,5 +256,7 @@  int hwp_register_driver(void);
 bool hwp_active(void);
 int get_hwp_para(unsigned int cpu,
                  struct xen_cppc_para *cppc_para);
+int set_hwp_para(struct cpufreq_policy *policy,
+                 struct xen_set_cppc_para *set_cppc);
 
 #endif /* __XEN_CPUFREQ_PM_H__ */
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index a5f8369116..2457bf5e8f 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -351,6 +351,68 @@  struct xen_cppc_para {
     uint32_t activity_window;
 };
 
+/*
+ * Set CPPC values.
+ *
+ * Configure the parameters for CPPC.  Set bits in set_params control which
+ * values are applied.  If a bit is not set in set_params, the field must be
+ * zero.
+ *
+ * For HWP specifically, values must be limited to 0-255 or within
+ * XEN_SYSCTL_CPPC_ACT_WINDOW_MASK for activity window.  Set bits outside the
+ * range will be returned as -EINVAL.
+ *
+ * Activity Window may not be supported by the hardware.  In that case, the
+ * returned set_params will clear XEN_SYSCTL_CPPC_SET_ACT_WINDOW to indicate
+ * that it was not applied - though the rest of the values will be applied.
+ *
+ * There are a set of presets along with individual fields.  Presets are
+ * applied first, and then individual fields.  This allows customizing
+ * a preset without having to specify every value.
+ *
+ * The preset options values are as follows:
+ *
+ * preset      | minimum | maxium  | energy_perf
+ * ------------+---------+---------+----------------
+ * powersave   | lowest  | lowest  | powersave (255)
+ * ------------+---------+---------+----------------
+ * balance     | lowest  | highest | balance (128)
+ * ------------+---------+---------+----------------
+ * performance | highest | highest | performance (0)
+ *
+ * desired and activity_window are set to 0, hardware selected.
+ */
+struct xen_set_cppc_para {
+#define XEN_SYSCTL_CPPC_SET_MINIMUM              (1U << 0)
+#define XEN_SYSCTL_CPPC_SET_MAXIMUM              (1U << 1)
+#define XEN_SYSCTL_CPPC_SET_DESIRED              (1U << 2)
+#define XEN_SYSCTL_CPPC_SET_ENERGY_PERF          (1U << 3)
+#define XEN_SYSCTL_CPPC_SET_ACT_WINDOW           (1U << 4)
+#define XEN_SYSCTL_CPPC_SET_PRESET_MASK          0xf0000000
+#define XEN_SYSCTL_CPPC_SET_PRESET_NONE          0x00000000
+#define XEN_SYSCTL_CPPC_SET_PRESET_BALANCE       0x10000000
+#define XEN_SYSCTL_CPPC_SET_PRESET_POWERSAVE     0x20000000
+#define XEN_SYSCTL_CPPC_SET_PRESET_PERFORMANCE   0x30000000
+#define XEN_SYSCTL_CPPC_SET_PARAM_MASK \
+        (XEN_SYSCTL_CPPC_SET_PRESET_MASK | \
+         XEN_SYSCTL_CPPC_SET_MINIMUM     | \
+         XEN_SYSCTL_CPPC_SET_MAXIMUM     | \
+         XEN_SYSCTL_CPPC_SET_DESIRED     | \
+         XEN_SYSCTL_CPPC_SET_ENERGY_PERF | \
+         XEN_SYSCTL_CPPC_SET_ACT_WINDOW  )
+    /* IN/OUT */
+    uint32_t set_params; /* bitflags for valid values */
+    /* See comments in struct xen_cppc_para. */
+    /* IN */
+    uint32_t minimum;
+    uint32_t maximum;
+    uint32_t desired;
+    uint32_t energy_perf;
+#define XEN_SYSCTL_CPPC_ACT_WINDOW_MASK (XEN_CPPC_ACT_WINDOW_MANTISSA_MASK | \
+                                         XEN_CPPC_ACT_WINDOW_EXPONENT_MASK)
+    uint32_t activity_window;
+};
+
 #define XEN_HWP_DRIVER_NAME "hwp"
 
 /*
@@ -418,6 +480,7 @@  struct xen_sysctl_pm_op {
     #define SET_CPUFREQ_GOV            (CPUFREQ_PARA | 0x02)
     #define SET_CPUFREQ_PARA           (CPUFREQ_PARA | 0x03)
     #define GET_CPUFREQ_AVGFREQ        (CPUFREQ_PARA | 0x04)
+    #define SET_CPUFREQ_CPPC           (CPUFREQ_PARA | 0x05)
 
     /* set/reset scheduler power saving option */
     #define XEN_SYSCTL_pm_op_set_sched_opt_smt    0x21
@@ -444,6 +507,7 @@  struct xen_sysctl_pm_op {
         struct xen_get_cpufreq_para get_para;
         struct xen_set_cpufreq_gov  set_gov;
         struct xen_set_cpufreq_para set_para;
+        struct xen_set_cppc_para    set_cppc;
         uint64_aligned_t get_avgfreq;
         uint32_t                    set_sched_opt_smt;
 #define XEN_SYSCTL_CX_UNLIMITED 0xffffffff