diff mbox series

[v6,09/12] xen: add runtime parameter access support to hypfs

Message ID 20200226124705.29212-10-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series Add hypervisor sysfs-like support | expand

Commit Message

Jürgen Groß Feb. 26, 2020, 12:47 p.m. UTC
Add support to read and modify values of hypervisor runtime parameters
via the hypervisor file system.

As runtime parameters can be modified via a sysctl, too, this path has
to take the hypfs rw_lock as writer.

For custom runtime parameters the connection between the parameter
value and the file system is done via an init function which will set
the initial value (if needed) and the leaf properties.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V3:
- complete rework
- support custom parameters, too
- support parameter writing

V6:
- rewording in docs/misc/hypfs-paths.pandoc (Jan Beulich)
- use memchr() (Jan Beulich)
- use strlcat() (Jan Beulich)
- rework to use a custom parameter init function instead of a reference
  to a content variable, allowing to drop default strings
- style correction (Jan Beulich)
- dropping param_append_str() in favor of a custom function at its only
  use site
---
 docs/misc/hypfs-paths.pandoc |  9 +++++
 xen/arch/arm/xen.lds.S       |  5 +++
 xen/arch/x86/hvm/vmx/vmcs.c  | 30 ++++++++++++++++-
 xen/arch/x86/pv/domain.c     | 26 +++++++++++++--
 xen/arch/x86/xen.lds.S       |  5 +++
 xen/common/grant_table.c     | 37 ++++++++++++++++-----
 xen/common/hypfs.c           | 38 +++++++++++++++++++++
 xen/common/kernel.c          | 27 ++++++++++++++-
 xen/drivers/char/console.c   | 66 +++++++++++++++++++++++++++++++++----
 xen/include/xen/hypfs.h      |  4 +++
 xen/include/xen/param.h      | 78 +++++++++++++++++++++++++++++++++++++++-----
 11 files changed, 299 insertions(+), 26 deletions(-)

Comments

Jan Beulich March 4, 2020, 11:32 a.m. UTC | #1
On 26.02.2020 13:47, Juergen Gross wrote:
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -70,6 +70,30 @@ integer_param("ple_window", ple_window);
>  static bool __read_mostly opt_ept_pml = true;
>  static s8 __read_mostly opt_ept_ad = -1;
>  int8_t __read_mostly opt_ept_exec_sp = -1;
> +static char opt_ept_setting[16];

I don't think this is quite big enough.

> +static void update_ept_param_append(const char *str, int val)
> +{
> +    char *pos = opt_ept_setting + strlen(opt_ept_setting);
> +
> +    snprintf(pos, sizeof(opt_ept_setting) - (pos - opt_ept_setting),
> +             ",%s=%d", str, val);
> +}
> +
> +static void update_ept_param(void)
> +{
> +    snprintf(opt_ept_setting, sizeof(opt_ept_setting), "pml=%d", opt_ept_pml);
> +    if ( opt_ept_ad >= 0 )
> +        update_ept_param_append("ad", opt_ept_ad);

This won't correctly reflect reality: If you look at
vmx_init_vmcs_config(), even a negative value means "true" here,
unless on a specific Atom model. I think init_ept_param() wants
to have that erratum workaround logic moved there, such that
you can then assme the value to be non-negative here.

> +    if ( opt_ept_exec_sp >= 0 )
> +        update_ept_param_append("exec-sp", opt_ept_exec_sp);

I agree for this one - if the value is still -1, it has neither
been set nor is its value of any interest.

> +static void __init init_ept_param(struct param_hypfs *par)
> +{
> +    custom_runtime_set_var(par, opt_ept_setting);
> +    update_ept_param();
> +}
>  
>  static int __init parse_ept_param(const char *s)
>  {
> @@ -93,6 +117,8 @@ static int __init parse_ept_param(const char *s)
>          s = ss + 1;
>      } while ( *ss );
>  
> +    update_ept_param();

Isn't this redundant with the use in init_ept_param() (or the
other way around - there should be clear ordering between the
command line parsing functions and the param-init ones, I would
suppose)?

> --- a/xen/arch/x86/pv/domain.c
> +++ b/xen/arch/x86/pv/domain.c
> @@ -20,8 +20,27 @@ static __read_mostly enum {
>      PCID_OFF,
>      PCID_ALL,
>      PCID_XPTI,
> -    PCID_NOXPTI
> +    PCID_NOXPTI,
> +    PCID_END
>  } opt_pcid = PCID_XPTI;
> +static const char *opt_pcid_2_string[PCID_END] = {

You either want another const here, or (more space efficient) you
want to use const char[PCID_END][7].

> +    [PCID_OFF] = "off",
> +    [PCID_ALL] = "on",
> +    [PCID_XPTI] = "xpti",
> +    [PCID_NOXPTI] = "noxpti"
> +};
> +static char opt_pcid_val[7];
> +
> +static void update_opt_pcid(void)
> +{
> +    strlcpy(opt_pcid_val, opt_pcid_2_string[opt_pcid], sizeof(opt_pcid_val));

Instead of copying, couldn't you make the hypfs entry point
into the array above, by using custom_runtime_set_var() here?

> @@ -55,9 +74,12 @@ static int parse_pcid(const char *s)
>          break;
>      }
>  
> +    if ( !rc )
> +        update_opt_pcid();

Personally I'd avoid the if() here - there's no harm updating
the hypfs entry anyway.

> --- a/xen/common/grant_table.c
> +++ b/xen/common/grant_table.c
> @@ -85,8 +85,10 @@ struct grant_table {
>      struct grant_table_arch arch;
>  };
>  
> -static int parse_gnttab_limit(const char *param, const char *arg,
> -                              unsigned int *valp)
> +#define GRANT_CUSTOM_VAL_SZ  12
> +
> +static int parse_gnttab_limit(const char *arg, unsigned int *valp,
> +                              char *parval)
>  {
>      const char *e;
>      unsigned long val;
> @@ -99,28 +101,47 @@ static int parse_gnttab_limit(const char *param, const char *arg,
>          return -ERANGE;
>  
>      *valp = val;
> +    snprintf(parval, GRANT_CUSTOM_VAL_SZ, "%lu", val);
>  
>      return 0;
>  }
>  
>  unsigned int __read_mostly opt_max_grant_frames = 64;
> +static char __read_mostly opt_max_grant_frames_val[GRANT_CUSTOM_VAL_SZ];
> +
> +static void __init gnttab_max_frames_init(struct param_hypfs *par)
> +{
> +    custom_runtime_set_var(par, opt_max_grant_frames_val);

You still use a custom string buffer here. Can this "set-var"
operation record that the variable (for presentation purposes)
is simply of UINT type, handing a pointer to the actual
variable?

> --- a/xen/common/hypfs.c
> +++ b/xen/common/hypfs.c
> @@ -10,6 +10,7 @@
>  #include <xen/hypercall.h>
>  #include <xen/hypfs.h>
>  #include <xen/lib.h>
> +#include <xen/param.h>
>  #include <xen/rwlock.h>
>  #include <public/hypfs.h>
>  
> @@ -281,6 +282,33 @@ int hypfs_write_bool(struct hypfs_entry_leaf *leaf,
>      return 0;
>  }
>  
> +int hypfs_write_custom(struct hypfs_entry_leaf *leaf,
> +                       XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
> +{
> +    struct param_hypfs *p;
> +    char *buf;
> +    int ret;
> +
> +    buf = xzalloc_array(char, ulen);
> +    if ( !buf )
> +        return -ENOMEM;
> +
> +    ret = -EFAULT;
> +    if ( copy_from_guest(buf, uaddr, ulen) )
> +        goto out;
> +
> +    ret = -EDOM;
> +    if ( !memchr(buf, 0, ulen) )

Once again " != buf + ulen - 1"? (EDOM also looks like an odd
error code to use in this case, but I guess there's no really
good one.)

> --- a/xen/drivers/char/console.c
> +++ b/xen/drivers/char/console.c
> @@ -75,12 +75,36 @@ enum con_timestamp_mode
>      TSM_DATE_MS,       /* [YYYY-MM-DD HH:MM:SS.mmm] */
>      TSM_BOOT,          /* [SSSSSS.uuuuuu] */
>      TSM_RAW,           /* [XXXXXXXXXXXXXXXX] */
> +    TSM_END
> +};
> +
> +static const char *con_timestamp_mode_2_string[TSM_END] = {
> +    [TSM_NONE] = "none",
> +    [TSM_DATE] = "date",
> +    [TSM_DATE_MS] = "datems",
> +    [TSM_BOOT] = "boot",
> +    [TSM_RAW] = "raw"
>  };
>  
>  static enum con_timestamp_mode __read_mostly opt_con_timestamp_mode = TSM_NONE;
> +static char con_timestamp_mode_val[7];
> +
> +static void update_con_timestamp_mode(void)
> +{
> +    strlcpy(con_timestamp_mode_val,
> +            con_timestamp_mode_2_string[opt_con_timestamp_mode],
> +            sizeof(con_timestamp_mode_val));
> +}
> +
> +static void __init con_timestamp_mode_init(struct param_hypfs *par)
> +{
> +    custom_runtime_set_var(par, con_timestamp_mode_val);
> +    update_con_timestamp_mode();
> +}
>  
>  static int parse_console_timestamps(const char *s);
> -custom_runtime_param("console_timestamps", parse_console_timestamps);
> +custom_runtime_param("console_timestamps", parse_console_timestamps,
> +                     con_timestamp_mode_init);

Same remark as for the PCID option, and then also for the log level
ones further down. My main concern with how things are currently is
that the amount of logic needed for custom params seems overly
large.

> @@ -79,41 +88,94 @@ extern const struct kernel_param __param_start[], __param_end[];
>            .type = OPT_IGNORE }
>  
>  #define __rtparam         __param(__dataparam)
> +#define __paramfs         static __paramhypfs \
> +    __attribute__((__aligned__(sizeof(void *)))) struct param_hypfs
>  
> -#define custom_runtime_only_param(_name, _var) \
> +#define custom_runtime_set_var(parfs, var) \
> +    { \
> +        (parfs)->hypfs.write_ptr = &(var); \
> +        (parfs)->hypfs.e.size = sizeof(var); \

All users of this use char[]. Why sizeof() rather than strlen(),
and why taking the address instead of enforcing this to be of
(at least) array (potentially also "of char") type? Do you
envision this to be needed for anything where the value isn't
in string form, but still needs dynamically calculating? (As per
above there may already be cases where non-string variables may
want passing into here.)

Jan
Jürgen Groß March 4, 2020, 3:07 p.m. UTC | #2
On 04.03.20 12:32, Jan Beulich wrote:
> On 26.02.2020 13:47, Juergen Gross wrote:
>> --- a/xen/arch/x86/hvm/vmx/vmcs.c
>> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
>> @@ -70,6 +70,30 @@ integer_param("ple_window", ple_window);
>>   static bool __read_mostly opt_ept_pml = true;
>>   static s8 __read_mostly opt_ept_ad = -1;
>>   int8_t __read_mostly opt_ept_exec_sp = -1;
>> +static char opt_ept_setting[16];
> 
> I don't think this is quite big enough.

Yes, you are right.

> 
>> +static void update_ept_param_append(const char *str, int val)
>> +{
>> +    char *pos = opt_ept_setting + strlen(opt_ept_setting);
>> +
>> +    snprintf(pos, sizeof(opt_ept_setting) - (pos - opt_ept_setting),
>> +             ",%s=%d", str, val);
>> +}
>> +
>> +static void update_ept_param(void)
>> +{
>> +    snprintf(opt_ept_setting, sizeof(opt_ept_setting), "pml=%d", opt_ept_pml);
>> +    if ( opt_ept_ad >= 0 )
>> +        update_ept_param_append("ad", opt_ept_ad);
> 
> This won't correctly reflect reality: If you look at
> vmx_init_vmcs_config(), even a negative value means "true" here,
> unless on a specific Atom model. I think init_ept_param() wants
> to have that erratum workaround logic moved there, such that
> you can then assme the value to be non-negative here.

But isn't not mentioning it in the -1 case correct? -1 means: do the
correct thing on the current hardware.

In case we want to report an explicit value for "ad" we should add a
node for that purpose.

>> +    if ( opt_ept_exec_sp >= 0 )
>> +        update_ept_param_append("exec-sp", opt_ept_exec_sp);
> 
> I agree for this one - if the value is still -1, it has neither
> been set nor is its value of any interest.
> 
>> +static void __init init_ept_param(struct param_hypfs *par)
>> +{
>> +    custom_runtime_set_var(par, opt_ept_setting);
>> +    update_ept_param();
>> +}
>>   
>>   static int __init parse_ept_param(const char *s)
>>   {
>> @@ -93,6 +117,8 @@ static int __init parse_ept_param(const char *s)
>>           s = ss + 1;
>>       } while ( *ss );
>>   
>> +    update_ept_param();
> 
> Isn't this redundant with the use in init_ept_param() (or the
> other way around - there should be clear ordering between the
> command line parsing functions and the param-init ones, I would
> suppose)?

You are right. I can drop this call, as the param-init call will
come later.

> 
>> --- a/xen/arch/x86/pv/domain.c
>> +++ b/xen/arch/x86/pv/domain.c
>> @@ -20,8 +20,27 @@ static __read_mostly enum {
>>       PCID_OFF,
>>       PCID_ALL,
>>       PCID_XPTI,
>> -    PCID_NOXPTI
>> +    PCID_NOXPTI,
>> +    PCID_END
>>   } opt_pcid = PCID_XPTI;
>> +static const char *opt_pcid_2_string[PCID_END] = {
> 
> You either want another const here, or (more space efficient) you
> want to use const char[PCID_END][7].

Ah, right, good idea.

> 
>> +    [PCID_OFF] = "off",
>> +    [PCID_ALL] = "on",
>> +    [PCID_XPTI] = "xpti",
>> +    [PCID_NOXPTI] = "noxpti"
>> +};
>> +static char opt_pcid_val[7];
>> +
>> +static void update_opt_pcid(void)
>> +{
>> +    strlcpy(opt_pcid_val, opt_pcid_2_string[opt_pcid], sizeof(opt_pcid_val));
> 
> Instead of copying, couldn't you make the hypfs entry point
> into the array above, by using custom_runtime_set_var() here?

Hmm, probably yes.

> 
>> @@ -55,9 +74,12 @@ static int parse_pcid(const char *s)
>>           break;
>>       }
>>   
>> +    if ( !rc )
>> +        update_opt_pcid();
> 
> Personally I'd avoid the if() here - there's no harm updating
> the hypfs entry anyway.

Okay.

> 
>> --- a/xen/common/grant_table.c
>> +++ b/xen/common/grant_table.c
>> @@ -85,8 +85,10 @@ struct grant_table {
>>       struct grant_table_arch arch;
>>   };
>>   
>> -static int parse_gnttab_limit(const char *param, const char *arg,
>> -                              unsigned int *valp)
>> +#define GRANT_CUSTOM_VAL_SZ  12
>> +
>> +static int parse_gnttab_limit(const char *arg, unsigned int *valp,
>> +                              char *parval)
>>   {
>>       const char *e;
>>       unsigned long val;
>> @@ -99,28 +101,47 @@ static int parse_gnttab_limit(const char *param, const char *arg,
>>           return -ERANGE;
>>   
>>       *valp = val;
>> +    snprintf(parval, GRANT_CUSTOM_VAL_SZ, "%lu", val);
>>   
>>       return 0;
>>   }
>>   
>>   unsigned int __read_mostly opt_max_grant_frames = 64;
>> +static char __read_mostly opt_max_grant_frames_val[GRANT_CUSTOM_VAL_SZ];
>> +
>> +static void __init gnttab_max_frames_init(struct param_hypfs *par)
>> +{
>> +    custom_runtime_set_var(par, opt_max_grant_frames_val);
> 
> You still use a custom string buffer here. Can this "set-var"
> operation record that the variable (for presentation purposes)
> is simply of UINT type, handing a pointer to the actual
> variable?

No, this would result in the need to set a custom parameter via a
binary value passed in from user land. So I'd need to convert this
binary into a string to be parseable by the custom function.

> 
>> --- a/xen/common/hypfs.c
>> +++ b/xen/common/hypfs.c
>> @@ -10,6 +10,7 @@
>>   #include <xen/hypercall.h>
>>   #include <xen/hypfs.h>
>>   #include <xen/lib.h>
>> +#include <xen/param.h>
>>   #include <xen/rwlock.h>
>>   #include <public/hypfs.h>
>>   
>> @@ -281,6 +282,33 @@ int hypfs_write_bool(struct hypfs_entry_leaf *leaf,
>>       return 0;
>>   }
>>   
>> +int hypfs_write_custom(struct hypfs_entry_leaf *leaf,
>> +                       XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
>> +{
>> +    struct param_hypfs *p;
>> +    char *buf;
>> +    int ret;
>> +
>> +    buf = xzalloc_array(char, ulen);
>> +    if ( !buf )
>> +        return -ENOMEM;
>> +
>> +    ret = -EFAULT;
>> +    if ( copy_from_guest(buf, uaddr, ulen) )
>> +        goto out;
>> +
>> +    ret = -EDOM;
>> +    if ( !memchr(buf, 0, ulen) )
> 
> Once again " != buf + ulen - 1"? (EDOM also looks like an odd
> error code to use in this case, but I guess there's no really
> good one.)

" != buf + ulen - 1" is a logical choice with the change of patch 4.

> 
>> --- a/xen/drivers/char/console.c
>> +++ b/xen/drivers/char/console.c
>> @@ -75,12 +75,36 @@ enum con_timestamp_mode
>>       TSM_DATE_MS,       /* [YYYY-MM-DD HH:MM:SS.mmm] */
>>       TSM_BOOT,          /* [SSSSSS.uuuuuu] */
>>       TSM_RAW,           /* [XXXXXXXXXXXXXXXX] */
>> +    TSM_END
>> +};
>> +
>> +static const char *con_timestamp_mode_2_string[TSM_END] = {
>> +    [TSM_NONE] = "none",
>> +    [TSM_DATE] = "date",
>> +    [TSM_DATE_MS] = "datems",
>> +    [TSM_BOOT] = "boot",
>> +    [TSM_RAW] = "raw"
>>   };
>>   
>>   static enum con_timestamp_mode __read_mostly opt_con_timestamp_mode = TSM_NONE;
>> +static char con_timestamp_mode_val[7];
>> +
>> +static void update_con_timestamp_mode(void)
>> +{
>> +    strlcpy(con_timestamp_mode_val,
>> +            con_timestamp_mode_2_string[opt_con_timestamp_mode],
>> +            sizeof(con_timestamp_mode_val));
>> +}
>> +
>> +static void __init con_timestamp_mode_init(struct param_hypfs *par)
>> +{
>> +    custom_runtime_set_var(par, con_timestamp_mode_val);
>> +    update_con_timestamp_mode();
>> +}
>>   
>>   static int parse_console_timestamps(const char *s);
>> -custom_runtime_param("console_timestamps", parse_console_timestamps);
>> +custom_runtime_param("console_timestamps", parse_console_timestamps,
>> +                     con_timestamp_mode_init);
> 
> Same remark as for the PCID option, and then also for the log level
> ones further down. My main concern with how things are currently is
> that the amount of logic needed for custom params seems overly
> large.
> 
>> @@ -79,41 +88,94 @@ extern const struct kernel_param __param_start[], __param_end[];
>>             .type = OPT_IGNORE }
>>   
>>   #define __rtparam         __param(__dataparam)
>> +#define __paramfs         static __paramhypfs \
>> +    __attribute__((__aligned__(sizeof(void *)))) struct param_hypfs
>>   
>> -#define custom_runtime_only_param(_name, _var) \
>> +#define custom_runtime_set_var(parfs, var) \
>> +    { \
>> +        (parfs)->hypfs.write_ptr = &(var); \
>> +        (parfs)->hypfs.e.size = sizeof(var); \
> 
> All users of this use char[]. Why sizeof() rather than strlen(),

That is the maximum string length. Otherwise I wouldn't know I am
allowed to replace e.g. "on" by "noxpti".

> and why taking the address instead of enforcing this to be of
> (at least) array (potentially also "of char") type? Do you
> envision this to be needed for anything where the value isn't
> in string form, but still needs dynamically calculating? (As per
> above there may already be cases where non-string variables may
> want passing into here.)

Ah, this is a leftover from the failed experiment to allow other
types than strings. I'll change it accordingly.


Juergen
Jan Beulich March 4, 2020, 3:19 p.m. UTC | #3
On 04.03.2020 16:07, Jürgen Groß wrote:
> On 04.03.20 12:32, Jan Beulich wrote:
>> On 26.02.2020 13:47, Juergen Gross wrote:
>>> +static void update_ept_param_append(const char *str, int val)
>>> +{
>>> +    char *pos = opt_ept_setting + strlen(opt_ept_setting);
>>> +
>>> +    snprintf(pos, sizeof(opt_ept_setting) - (pos - opt_ept_setting),
>>> +             ",%s=%d", str, val);
>>> +}
>>> +
>>> +static void update_ept_param(void)
>>> +{
>>> +    snprintf(opt_ept_setting, sizeof(opt_ept_setting), "pml=%d", opt_ept_pml);
>>> +    if ( opt_ept_ad >= 0 )
>>> +        update_ept_param_append("ad", opt_ept_ad);
>>
>> This won't correctly reflect reality: If you look at
>> vmx_init_vmcs_config(), even a negative value means "true" here,
>> unless on a specific Atom model. I think init_ept_param() wants
>> to have that erratum workaround logic moved there, such that
>> you can then assme the value to be non-negative here.
> 
> But isn't not mentioning it in the -1 case correct? -1 means: do the
> correct thing on the current hardware.

Well, I think the output here should represent effective settings,
and a sub-item should be suppressed only if a setting has no effect
at all in the current setup, like ...

>>> +    if ( opt_ept_exec_sp >= 0 )
>>> +        update_ept_param_append("exec-sp", opt_ept_exec_sp);
>>
>> I agree for this one - if the value is still -1, it has neither
>> been set nor is its value of any interest.

... here.

>>> --- a/xen/common/grant_table.c
>>> +++ b/xen/common/grant_table.c
>>> @@ -85,8 +85,10 @@ struct grant_table {
>>>       struct grant_table_arch arch;
>>>   };
>>>   
>>> -static int parse_gnttab_limit(const char *param, const char *arg,
>>> -                              unsigned int *valp)
>>> +#define GRANT_CUSTOM_VAL_SZ  12
>>> +
>>> +static int parse_gnttab_limit(const char *arg, unsigned int *valp,
>>> +                              char *parval)
>>>   {
>>>       const char *e;
>>>       unsigned long val;
>>> @@ -99,28 +101,47 @@ static int parse_gnttab_limit(const char *param, const char *arg,
>>>           return -ERANGE;
>>>   
>>>       *valp = val;
>>> +    snprintf(parval, GRANT_CUSTOM_VAL_SZ, "%lu", val);
>>>   
>>>       return 0;
>>>   }
>>>   
>>>   unsigned int __read_mostly opt_max_grant_frames = 64;
>>> +static char __read_mostly opt_max_grant_frames_val[GRANT_CUSTOM_VAL_SZ];
>>> +
>>> +static void __init gnttab_max_frames_init(struct param_hypfs *par)
>>> +{
>>> +    custom_runtime_set_var(par, opt_max_grant_frames_val);
>>
>> You still use a custom string buffer here. Can this "set-var"
>> operation record that the variable (for presentation purposes)
>> is simply of UINT type, handing a pointer to the actual
>> variable?
> 
> No, this would result in the need to set a custom parameter via a
> binary value passed in from user land. So I'd need to convert this
> binary into a string to be parseable by the custom function.

Hmm, not very fortunate, but I can see what you're saying.

>>> --- a/xen/common/hypfs.c
>>> +++ b/xen/common/hypfs.c
>>> @@ -10,6 +10,7 @@
>>>   #include <xen/hypercall.h>
>>>   #include <xen/hypfs.h>
>>>   #include <xen/lib.h>
>>> +#include <xen/param.h>
>>>   #include <xen/rwlock.h>
>>>   #include <public/hypfs.h>
>>>   
>>> @@ -281,6 +282,33 @@ int hypfs_write_bool(struct hypfs_entry_leaf *leaf,
>>>       return 0;
>>>   }
>>>   
>>> +int hypfs_write_custom(struct hypfs_entry_leaf *leaf,
>>> +                       XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
>>> +{
>>> +    struct param_hypfs *p;
>>> +    char *buf;
>>> +    int ret;
>>> +
>>> +    buf = xzalloc_array(char, ulen);
>>> +    if ( !buf )
>>> +        return -ENOMEM;
>>> +
>>> +    ret = -EFAULT;
>>> +    if ( copy_from_guest(buf, uaddr, ulen) )
>>> +        goto out;
>>> +
>>> +    ret = -EDOM;
>>> +    if ( !memchr(buf, 0, ulen) )
>>
>> Once again " != buf + ulen - 1"? (EDOM also looks like an odd
>> error code to use in this case, but I guess there's no really
>> good one.)
> 
> " != buf + ulen - 1" is a logical choice with the change of patch 4.

I'm afraid I don't understand. You want to parse a string here.
The caller should tell you what the string length is (including
the nul again), not what its buffer size may be.

>>> @@ -79,41 +88,94 @@ extern const struct kernel_param __param_start[], __param_end[];
>>>             .type = OPT_IGNORE }
>>>   
>>>   #define __rtparam         __param(__dataparam)
>>> +#define __paramfs         static __paramhypfs \
>>> +    __attribute__((__aligned__(sizeof(void *)))) struct param_hypfs
>>>   
>>> -#define custom_runtime_only_param(_name, _var) \
>>> +#define custom_runtime_set_var(parfs, var) \
>>> +    { \
>>> +        (parfs)->hypfs.write_ptr = &(var); \
>>> +        (parfs)->hypfs.e.size = sizeof(var); \
>>
>> All users of this use char[]. Why sizeof() rather than strlen(),
> 
> That is the maximum string length. Otherwise I wouldn't know I am
> allowed to replace e.g. "on" by "noxpti".

As said elsewhere - if e.size is the buffer size, then the
reading function wants adjusting, and it needs to be clarified
how buffer size and payload size can be told apart for BLOBs.

Jan
Jürgen Groß March 4, 2020, 4:31 p.m. UTC | #4
On 04.03.20 16:19, Jan Beulich wrote:
> On 04.03.2020 16:07, Jürgen Groß wrote:
>> On 04.03.20 12:32, Jan Beulich wrote:
>>> On 26.02.2020 13:47, Juergen Gross wrote:
>>>> +static void update_ept_param_append(const char *str, int val)
>>>> +{
>>>> +    char *pos = opt_ept_setting + strlen(opt_ept_setting);
>>>> +
>>>> +    snprintf(pos, sizeof(opt_ept_setting) - (pos - opt_ept_setting),
>>>> +             ",%s=%d", str, val);
>>>> +}
>>>> +
>>>> +static void update_ept_param(void)
>>>> +{
>>>> +    snprintf(opt_ept_setting, sizeof(opt_ept_setting), "pml=%d", opt_ept_pml);
>>>> +    if ( opt_ept_ad >= 0 )
>>>> +        update_ept_param_append("ad", opt_ept_ad);
>>>
>>> This won't correctly reflect reality: If you look at
>>> vmx_init_vmcs_config(), even a negative value means "true" here,
>>> unless on a specific Atom model. I think init_ept_param() wants
>>> to have that erratum workaround logic moved there, such that
>>> you can then assme the value to be non-negative here.
>>
>> But isn't not mentioning it in the -1 case correct? -1 means: do the
>> correct thing on the current hardware.
> 
> Well, I think the output here should represent effective settings,

The minimum requirement is to reflect the effective parameters, like
cmdline is doing for boot-time only parameters. With runtime parameters
we had no way of telling what was set, and this is now possible.

> and a sub-item should be suppressed only if a setting has no effect
> at all in the current setup, like ...
> 
>>>> +    if ( opt_ept_exec_sp >= 0 )
>>>> +        update_ept_param_append("exec-sp", opt_ept_exec_sp);
>>>
>>> I agree for this one - if the value is still -1, it has neither
>>> been set nor is its value of any interest.
> 
> ... here.

I think we should not mix up specified parameters and effective
settings. In case an effective setting is of common interest it should
be reported via a specific node (like e.g. specific mitigation settings
where the cmdline is not providing enough details).

> 
>>>> --- a/xen/common/grant_table.c
>>>> +++ b/xen/common/grant_table.c
>>>> @@ -85,8 +85,10 @@ struct grant_table {
>>>>        struct grant_table_arch arch;
>>>>    };
>>>>    
>>>> -static int parse_gnttab_limit(const char *param, const char *arg,
>>>> -                              unsigned int *valp)
>>>> +#define GRANT_CUSTOM_VAL_SZ  12
>>>> +
>>>> +static int parse_gnttab_limit(const char *arg, unsigned int *valp,
>>>> +                              char *parval)
>>>>    {
>>>>        const char *e;
>>>>        unsigned long val;
>>>> @@ -99,28 +101,47 @@ static int parse_gnttab_limit(const char *param, const char *arg,
>>>>            return -ERANGE;
>>>>    
>>>>        *valp = val;
>>>> +    snprintf(parval, GRANT_CUSTOM_VAL_SZ, "%lu", val);
>>>>    
>>>>        return 0;
>>>>    }
>>>>    
>>>>    unsigned int __read_mostly opt_max_grant_frames = 64;
>>>> +static char __read_mostly opt_max_grant_frames_val[GRANT_CUSTOM_VAL_SZ];
>>>> +
>>>> +static void __init gnttab_max_frames_init(struct param_hypfs *par)
>>>> +{
>>>> +    custom_runtime_set_var(par, opt_max_grant_frames_val);
>>>
>>> You still use a custom string buffer here. Can this "set-var"
>>> operation record that the variable (for presentation purposes)
>>> is simply of UINT type, handing a pointer to the actual
>>> variable?
>>
>> No, this would result in the need to set a custom parameter via a
>> binary value passed in from user land. So I'd need to convert this
>> binary into a string to be parseable by the custom function.
> 
> Hmm, not very fortunate, but I can see what you're saying.
> 
>>>> --- a/xen/common/hypfs.c
>>>> +++ b/xen/common/hypfs.c
>>>> @@ -10,6 +10,7 @@
>>>>    #include <xen/hypercall.h>
>>>>    #include <xen/hypfs.h>
>>>>    #include <xen/lib.h>
>>>> +#include <xen/param.h>
>>>>    #include <xen/rwlock.h>
>>>>    #include <public/hypfs.h>
>>>>    
>>>> @@ -281,6 +282,33 @@ int hypfs_write_bool(struct hypfs_entry_leaf *leaf,
>>>>        return 0;
>>>>    }
>>>>    
>>>> +int hypfs_write_custom(struct hypfs_entry_leaf *leaf,
>>>> +                       XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
>>>> +{
>>>> +    struct param_hypfs *p;
>>>> +    char *buf;
>>>> +    int ret;
>>>> +
>>>> +    buf = xzalloc_array(char, ulen);
>>>> +    if ( !buf )
>>>> +        return -ENOMEM;
>>>> +
>>>> +    ret = -EFAULT;
>>>> +    if ( copy_from_guest(buf, uaddr, ulen) )
>>>> +        goto out;
>>>> +
>>>> +    ret = -EDOM;
>>>> +    if ( !memchr(buf, 0, ulen) )
>>>
>>> Once again " != buf + ulen - 1"? (EDOM also looks like an odd
>>> error code to use in this case, but I guess there's no really
>>> good one.)
>>
>> " != buf + ulen - 1" is a logical choice with the change of patch 4.
> 
> I'm afraid I don't understand. You want to parse a string here.
> The caller should tell you what the string length is (including
> the nul again), not what its buffer size may be.

I agreed that changing to " != buf + ulen - 1" makes sense as I
agreed already to do so in patch 4.

> 
>>>> @@ -79,41 +88,94 @@ extern const struct kernel_param __param_start[], __param_end[];
>>>>              .type = OPT_IGNORE }
>>>>    
>>>>    #define __rtparam         __param(__dataparam)
>>>> +#define __paramfs         static __paramhypfs \
>>>> +    __attribute__((__aligned__(sizeof(void *)))) struct param_hypfs
>>>>    
>>>> -#define custom_runtime_only_param(_name, _var) \
>>>> +#define custom_runtime_set_var(parfs, var) \
>>>> +    { \
>>>> +        (parfs)->hypfs.write_ptr = &(var); \
>>>> +        (parfs)->hypfs.e.size = sizeof(var); \
>>>
>>> All users of this use char[]. Why sizeof() rather than strlen(),
>>
>> That is the maximum string length. Otherwise I wouldn't know I am
>> allowed to replace e.g. "on" by "noxpti".
> 
> As said elsewhere - if e.size is the buffer size, then the
> reading function wants adjusting, and it needs to be clarified
> how buffer size and payload size can be told apart for BLOBs.

Okay, I'll adjust the reading size to copy only strlen() + 1 bytes
and add a comment that BLOBs need blob-specific write and read
functions in the common case.


Juergen
Jan Beulich March 4, 2020, 4:56 p.m. UTC | #5
On 04.03.2020 17:31, Jürgen Groß wrote:
> On 04.03.20 16:19, Jan Beulich wrote:
>> On 04.03.2020 16:07, Jürgen Groß wrote:
>>> On 04.03.20 12:32, Jan Beulich wrote:
>>>> On 26.02.2020 13:47, Juergen Gross wrote:
>>>>> +static void update_ept_param_append(const char *str, int val)
>>>>> +{
>>>>> +    char *pos = opt_ept_setting + strlen(opt_ept_setting);
>>>>> +
>>>>> +    snprintf(pos, sizeof(opt_ept_setting) - (pos - opt_ept_setting),
>>>>> +             ",%s=%d", str, val);
>>>>> +}
>>>>> +
>>>>> +static void update_ept_param(void)
>>>>> +{
>>>>> +    snprintf(opt_ept_setting, sizeof(opt_ept_setting), "pml=%d", opt_ept_pml);
>>>>> +    if ( opt_ept_ad >= 0 )
>>>>> +        update_ept_param_append("ad", opt_ept_ad);
>>>>
>>>> This won't correctly reflect reality: If you look at
>>>> vmx_init_vmcs_config(), even a negative value means "true" here,
>>>> unless on a specific Atom model. I think init_ept_param() wants
>>>> to have that erratum workaround logic moved there, such that
>>>> you can then assme the value to be non-negative here.
>>>
>>> But isn't not mentioning it in the -1 case correct? -1 means: do the
>>> correct thing on the current hardware.
>>
>> Well, I think the output here should represent effective settings,
> 
> The minimum requirement is to reflect the effective parameters, like
> cmdline is doing for boot-time only parameters. With runtime parameters
> we had no way of telling what was set, and this is now possible.
> 
>> and a sub-item should be suppressed only if a setting has no effect
>> at all in the current setup, like ...
>>
>>>>> +    if ( opt_ept_exec_sp >= 0 )
>>>>> +        update_ept_param_append("exec-sp", opt_ept_exec_sp);
>>>>
>>>> I agree for this one - if the value is still -1, it has neither
>>>> been set nor is its value of any interest.
>>
>> ... here.
> 
> I think we should not mix up specified parameters and effective
> settings. In case an effective setting is of common interest it should
> be reported via a specific node (like e.g. specific mitigation settings
> where the cmdline is not providing enough details).

But then a boolean option that wasn't specified on the command line
should produce no output at all. And hence we'd need a way to tell
whether an option was set from command line for _all_ of them. I
don't think this would be very helpful.

I'm curious if anyone else has any opinion either way (or yet
another one) here:

Jan
Jürgen Groß March 5, 2020, 6:01 a.m. UTC | #6
On 04.03.20 17:56, Jan Beulich wrote:
> On 04.03.2020 17:31, Jürgen Groß wrote:
>> On 04.03.20 16:19, Jan Beulich wrote:
>>> On 04.03.2020 16:07, Jürgen Groß wrote:
>>>> On 04.03.20 12:32, Jan Beulich wrote:
>>>>> On 26.02.2020 13:47, Juergen Gross wrote:
>>>>>> +static void update_ept_param_append(const char *str, int val)
>>>>>> +{
>>>>>> +    char *pos = opt_ept_setting + strlen(opt_ept_setting);
>>>>>> +
>>>>>> +    snprintf(pos, sizeof(opt_ept_setting) - (pos - opt_ept_setting),
>>>>>> +             ",%s=%d", str, val);
>>>>>> +}
>>>>>> +
>>>>>> +static void update_ept_param(void)
>>>>>> +{
>>>>>> +    snprintf(opt_ept_setting, sizeof(opt_ept_setting), "pml=%d", opt_ept_pml);
>>>>>> +    if ( opt_ept_ad >= 0 )
>>>>>> +        update_ept_param_append("ad", opt_ept_ad);
>>>>>
>>>>> This won't correctly reflect reality: If you look at
>>>>> vmx_init_vmcs_config(), even a negative value means "true" here,
>>>>> unless on a specific Atom model. I think init_ept_param() wants
>>>>> to have that erratum workaround logic moved there, such that
>>>>> you can then assme the value to be non-negative here.
>>>>
>>>> But isn't not mentioning it in the -1 case correct? -1 means: do the
>>>> correct thing on the current hardware.
>>>
>>> Well, I think the output here should represent effective settings,
>>
>> The minimum requirement is to reflect the effective parameters, like
>> cmdline is doing for boot-time only parameters. With runtime parameters
>> we had no way of telling what was set, and this is now possible.
>>
>>> and a sub-item should be suppressed only if a setting has no effect
>>> at all in the current setup, like ...
>>>
>>>>>> +    if ( opt_ept_exec_sp >= 0 )
>>>>>> +        update_ept_param_append("exec-sp", opt_ept_exec_sp);
>>>>>
>>>>> I agree for this one - if the value is still -1, it has neither
>>>>> been set nor is its value of any interest.
>>>
>>> ... here.
>>
>> I think we should not mix up specified parameters and effective
>> settings. In case an effective setting is of common interest it should
>> be reported via a specific node (like e.g. specific mitigation settings
>> where the cmdline is not providing enough details).
> 
> But then a boolean option that wasn't specified on the command line
> should produce no output at all. And hence we'd need a way to tell
> whether an option was set from command line for _all_ of them. I
> don't think this would be very helpful.

I disagree here.

This is important only for cases where the hypervisor treats the
parameter as a tristate: true/false/unspecified. In all cases where
the bool value is really true or false it can be reported as such.

Reporting 0/1 for e.g. "ad" if opt_ept_ad==-1 would add a latent problem
if any other action would be derived from the parameter variable being
-1.

So either opt_ept_ad should be modified to change it to 0/1 instead of
only setting the VCMS flag, or the logic should be kept as is in this
patch. IMO changing the setting of opt_ept_ad should be done in another
patch if this is really wanted.

> 
> I'm curious if anyone else has any opinion either way (or yet
> another one) here:

Me too.


Juergen
Jan Beulich March 5, 2020, 8:26 a.m. UTC | #7
On 05.03.2020 07:01, Jürgen Groß wrote:
> On 04.03.20 17:56, Jan Beulich wrote:
>> On 04.03.2020 17:31, Jürgen Groß wrote:
>>> On 04.03.20 16:19, Jan Beulich wrote:
>>>> On 04.03.2020 16:07, Jürgen Groß wrote:
>>>>> On 04.03.20 12:32, Jan Beulich wrote:
>>>>>> On 26.02.2020 13:47, Juergen Gross wrote:
>>>>>>> +static void update_ept_param_append(const char *str, int val)
>>>>>>> +{
>>>>>>> +    char *pos = opt_ept_setting + strlen(opt_ept_setting);
>>>>>>> +
>>>>>>> +    snprintf(pos, sizeof(opt_ept_setting) - (pos - opt_ept_setting),
>>>>>>> +             ",%s=%d", str, val);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void update_ept_param(void)
>>>>>>> +{
>>>>>>> +    snprintf(opt_ept_setting, sizeof(opt_ept_setting), "pml=%d", opt_ept_pml);
>>>>>>> +    if ( opt_ept_ad >= 0 )
>>>>>>> +        update_ept_param_append("ad", opt_ept_ad);
>>>>>>
>>>>>> This won't correctly reflect reality: If you look at
>>>>>> vmx_init_vmcs_config(), even a negative value means "true" here,
>>>>>> unless on a specific Atom model. I think init_ept_param() wants
>>>>>> to have that erratum workaround logic moved there, such that
>>>>>> you can then assme the value to be non-negative here.
>>>>>
>>>>> But isn't not mentioning it in the -1 case correct? -1 means: do the
>>>>> correct thing on the current hardware.
>>>>
>>>> Well, I think the output here should represent effective settings,
>>>
>>> The minimum requirement is to reflect the effective parameters, like
>>> cmdline is doing for boot-time only parameters. With runtime parameters
>>> we had no way of telling what was set, and this is now possible.
>>>
>>>> and a sub-item should be suppressed only if a setting has no effect
>>>> at all in the current setup, like ...
>>>>
>>>>>>> +    if ( opt_ept_exec_sp >= 0 )
>>>>>>> +        update_ept_param_append("exec-sp", opt_ept_exec_sp);
>>>>>>
>>>>>> I agree for this one - if the value is still -1, it has neither
>>>>>> been set nor is its value of any interest.
>>>>
>>>> ... here.
>>>
>>> I think we should not mix up specified parameters and effective
>>> settings. In case an effective setting is of common interest it should
>>> be reported via a specific node (like e.g. specific mitigation settings
>>> where the cmdline is not providing enough details).
>>
>> But then a boolean option that wasn't specified on the command line
>> should produce no output at all. And hence we'd need a way to tell
>> whether an option was set from command line for _all_ of them. I
>> don't think this would be very helpful.
> 
> I disagree here.
> 
> This is important only for cases where the hypervisor treats the
> parameter as a tristate: true/false/unspecified. In all cases where
> the bool value is really true or false it can be reported as such.

The problem I'm having with this is the resulting inconsistency:
When we write the variable with 0 or 1 in case we find it to be
-1 after command line parsing, the externally visible effect will
be different from the case where we leave it to be -1 yet still
treat it as (pseudo-)boolean. This, however, is an implementation
detail, while imo the hypfs presentation should not depend on
such implementation details.

> Reporting 0/1 for e.g. "ad" if opt_ept_ad==-1 would add a latent problem
> if any other action would be derived from the parameter variable being
> -1.
> 
> So either opt_ept_ad should be modified to change it to 0/1 instead of
> only setting the VCMS flag,

That's what I did suggest.

> or the logic should be kept as is in this
> patch. IMO changing the setting of opt_ept_ad should be done in another
> patch if this is really wanted.

And of course I don't mind at all doing so in a prereq patch.
It's just that the patch here provides a good place _where_ to
actually do such an adjustment.

Jan
Jürgen Groß March 6, 2020, 6:42 a.m. UTC | #8
On 05.03.20 09:26, Jan Beulich wrote:
> On 05.03.2020 07:01, Jürgen Groß wrote:
>> On 04.03.20 17:56, Jan Beulich wrote:
>>> On 04.03.2020 17:31, Jürgen Groß wrote:
>>>> On 04.03.20 16:19, Jan Beulich wrote:
>>>>> On 04.03.2020 16:07, Jürgen Groß wrote:
>>>>>> On 04.03.20 12:32, Jan Beulich wrote:
>>>>>>> On 26.02.2020 13:47, Juergen Gross wrote:
>>>>>>>> +static void update_ept_param_append(const char *str, int val)
>>>>>>>> +{
>>>>>>>> +    char *pos = opt_ept_setting + strlen(opt_ept_setting);
>>>>>>>> +
>>>>>>>> +    snprintf(pos, sizeof(opt_ept_setting) - (pos - opt_ept_setting),
>>>>>>>> +             ",%s=%d", str, val);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static void update_ept_param(void)
>>>>>>>> +{
>>>>>>>> +    snprintf(opt_ept_setting, sizeof(opt_ept_setting), "pml=%d", opt_ept_pml);
>>>>>>>> +    if ( opt_ept_ad >= 0 )
>>>>>>>> +        update_ept_param_append("ad", opt_ept_ad);
>>>>>>>
>>>>>>> This won't correctly reflect reality: If you look at
>>>>>>> vmx_init_vmcs_config(), even a negative value means "true" here,
>>>>>>> unless on a specific Atom model. I think init_ept_param() wants
>>>>>>> to have that erratum workaround logic moved there, such that
>>>>>>> you can then assme the value to be non-negative here.
>>>>>>
>>>>>> But isn't not mentioning it in the -1 case correct? -1 means: do the
>>>>>> correct thing on the current hardware.
>>>>>
>>>>> Well, I think the output here should represent effective settings,
>>>>
>>>> The minimum requirement is to reflect the effective parameters, like
>>>> cmdline is doing for boot-time only parameters. With runtime parameters
>>>> we had no way of telling what was set, and this is now possible.
>>>>
>>>>> and a sub-item should be suppressed only if a setting has no effect
>>>>> at all in the current setup, like ...
>>>>>
>>>>>>>> +    if ( opt_ept_exec_sp >= 0 )
>>>>>>>> +        update_ept_param_append("exec-sp", opt_ept_exec_sp);
>>>>>>>
>>>>>>> I agree for this one - if the value is still -1, it has neither
>>>>>>> been set nor is its value of any interest.
>>>>>
>>>>> ... here.
>>>>
>>>> I think we should not mix up specified parameters and effective
>>>> settings. In case an effective setting is of common interest it should
>>>> be reported via a specific node (like e.g. specific mitigation settings
>>>> where the cmdline is not providing enough details).
>>>
>>> But then a boolean option that wasn't specified on the command line
>>> should produce no output at all. And hence we'd need a way to tell
>>> whether an option was set from command line for _all_ of them. I
>>> don't think this would be very helpful.
>>
>> I disagree here.
>>
>> This is important only for cases where the hypervisor treats the
>> parameter as a tristate: true/false/unspecified. In all cases where
>> the bool value is really true or false it can be reported as such.
> 
> The problem I'm having with this is the resulting inconsistency:
> When we write the variable with 0 or 1 in case we find it to be
> -1 after command line parsing, the externally visible effect will
> be different from the case where we leave it to be -1 yet still
> treat it as (pseudo-)boolean. This, however, is an implementation
> detail, while imo the hypfs presentation should not depend on
> such implementation details.
> 
>> Reporting 0/1 for e.g. "ad" if opt_ept_ad==-1 would add a latent problem
>> if any other action would be derived from the parameter variable being
>> -1.
>>
>> So either opt_ept_ad should be modified to change it to 0/1 instead of
>> only setting the VCMS flag,
> 
> That's what I did suggest.
> 
>> or the logic should be kept as is in this
>> patch. IMO changing the setting of opt_ept_ad should be done in another
>> patch if this is really wanted.
> 
> And of course I don't mind at all doing so in a prereq patch.
> It's just that the patch here provides a good place _where_ to
> actually do such an adjustment.

I was thinking of something like this:

--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -313,12 +313,12 @@ static int vmx_init_vmcs_config(void)
      {
          rdmsrl(MSR_IA32_VMX_EPT_VPID_CAP, _vmx_ept_vpid_cap);

+        if ( /* Work around Erratum AVR41 on Avoton processors. */
+             boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x4d &&
+             opt_ept_ad < 0 )
+            opt_ept_ad = 0;
          if ( !opt_ept_ad )
              _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT;
-        else if ( /* Work around Erratum AVR41 on Avoton processors. */
-                  boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 
0x4d &&
-                  opt_ept_ad < 0 )
-            _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT;

          /*
           * Additional sanity checking before using EPT:

Juergen
Jan Beulich March 6, 2020, 8:20 a.m. UTC | #9
On 06.03.2020 07:42, Jürgen Groß wrote:
> On 05.03.20 09:26, Jan Beulich wrote:
>> On 05.03.2020 07:01, Jürgen Groß wrote:
>>> On 04.03.20 17:56, Jan Beulich wrote:
>>>> On 04.03.2020 17:31, Jürgen Groß wrote:
>>>>> On 04.03.20 16:19, Jan Beulich wrote:
>>>>>> On 04.03.2020 16:07, Jürgen Groß wrote:
>>>>>>> On 04.03.20 12:32, Jan Beulich wrote:
>>>>>>>> On 26.02.2020 13:47, Juergen Gross wrote:
>>>>>>>>> +static void update_ept_param_append(const char *str, int val)
>>>>>>>>> +{
>>>>>>>>> +    char *pos = opt_ept_setting + strlen(opt_ept_setting);
>>>>>>>>> +
>>>>>>>>> +    snprintf(pos, sizeof(opt_ept_setting) - (pos - opt_ept_setting),
>>>>>>>>> +             ",%s=%d", str, val);
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static void update_ept_param(void)
>>>>>>>>> +{
>>>>>>>>> +    snprintf(opt_ept_setting, sizeof(opt_ept_setting), "pml=%d", opt_ept_pml);
>>>>>>>>> +    if ( opt_ept_ad >= 0 )
>>>>>>>>> +        update_ept_param_append("ad", opt_ept_ad);
>>>>>>>>
>>>>>>>> This won't correctly reflect reality: If you look at
>>>>>>>> vmx_init_vmcs_config(), even a negative value means "true" here,
>>>>>>>> unless on a specific Atom model. I think init_ept_param() wants
>>>>>>>> to have that erratum workaround logic moved there, such that
>>>>>>>> you can then assme the value to be non-negative here.
>>>>>>>
>>>>>>> But isn't not mentioning it in the -1 case correct? -1 means: do the
>>>>>>> correct thing on the current hardware.
>>>>>>
>>>>>> Well, I think the output here should represent effective settings,
>>>>>
>>>>> The minimum requirement is to reflect the effective parameters, like
>>>>> cmdline is doing for boot-time only parameters. With runtime parameters
>>>>> we had no way of telling what was set, and this is now possible.
>>>>>
>>>>>> and a sub-item should be suppressed only if a setting has no effect
>>>>>> at all in the current setup, like ...
>>>>>>
>>>>>>>>> +    if ( opt_ept_exec_sp >= 0 )
>>>>>>>>> +        update_ept_param_append("exec-sp", opt_ept_exec_sp);
>>>>>>>>
>>>>>>>> I agree for this one - if the value is still -1, it has neither
>>>>>>>> been set nor is its value of any interest.
>>>>>>
>>>>>> ... here.
>>>>>
>>>>> I think we should not mix up specified parameters and effective
>>>>> settings. In case an effective setting is of common interest it should
>>>>> be reported via a specific node (like e.g. specific mitigation settings
>>>>> where the cmdline is not providing enough details).
>>>>
>>>> But then a boolean option that wasn't specified on the command line
>>>> should produce no output at all. And hence we'd need a way to tell
>>>> whether an option was set from command line for _all_ of them. I
>>>> don't think this would be very helpful.
>>>
>>> I disagree here.
>>>
>>> This is important only for cases where the hypervisor treats the
>>> parameter as a tristate: true/false/unspecified. In all cases where
>>> the bool value is really true or false it can be reported as such.
>>
>> The problem I'm having with this is the resulting inconsistency:
>> When we write the variable with 0 or 1 in case we find it to be
>> -1 after command line parsing, the externally visible effect will
>> be different from the case where we leave it to be -1 yet still
>> treat it as (pseudo-)boolean. This, however, is an implementation
>> detail, while imo the hypfs presentation should not depend on
>> such implementation details.
>>
>>> Reporting 0/1 for e.g. "ad" if opt_ept_ad==-1 would add a latent problem
>>> if any other action would be derived from the parameter variable being
>>> -1.
>>>
>>> So either opt_ept_ad should be modified to change it to 0/1 instead of
>>> only setting the VCMS flag,
>>
>> That's what I did suggest.
>>
>>> or the logic should be kept as is in this
>>> patch. IMO changing the setting of opt_ept_ad should be done in another
>>> patch if this is really wanted.
>>
>> And of course I don't mind at all doing so in a prereq patch.
>> It's just that the patch here provides a good place _where_ to
>> actually do such an adjustment.
> 
> I was thinking of something like this:
> 
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -313,12 +313,12 @@ static int vmx_init_vmcs_config(void)
>       {
>           rdmsrl(MSR_IA32_VMX_EPT_VPID_CAP, _vmx_ept_vpid_cap);
> 
> +        if ( /* Work around Erratum AVR41 on Avoton processors. */
> +             boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x4d &&
> +             opt_ept_ad < 0 )
> +            opt_ept_ad = 0;
>           if ( !opt_ept_ad )
>               _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT;
> -        else if ( /* Work around Erratum AVR41 on Avoton processors. */
> -                  boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x4d &&
> -                  opt_ept_ad < 0 )
> -            _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT;
> 
>           /*
>            * Additional sanity checking before using EPT:

And I was specifically hoping to avoid doing this in a non-__init
function.

Jan
Jürgen Groß March 6, 2020, 8:47 a.m. UTC | #10
On 06.03.20 09:20, Jan Beulich wrote:
> On 06.03.2020 07:42, Jürgen Groß wrote:
>> On 05.03.20 09:26, Jan Beulich wrote:
>>> On 05.03.2020 07:01, Jürgen Groß wrote:
>>>> On 04.03.20 17:56, Jan Beulich wrote:
>>>>> On 04.03.2020 17:31, Jürgen Groß wrote:
>>>>>> On 04.03.20 16:19, Jan Beulich wrote:
>>>>>>> On 04.03.2020 16:07, Jürgen Groß wrote:
>>>>>>>> On 04.03.20 12:32, Jan Beulich wrote:
>>>>>>>>> On 26.02.2020 13:47, Juergen Gross wrote:
>>>>>>>>>> +static void update_ept_param_append(const char *str, int val)
>>>>>>>>>> +{
>>>>>>>>>> +    char *pos = opt_ept_setting + strlen(opt_ept_setting);
>>>>>>>>>> +
>>>>>>>>>> +    snprintf(pos, sizeof(opt_ept_setting) - (pos - opt_ept_setting),
>>>>>>>>>> +             ",%s=%d", str, val);
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static void update_ept_param(void)
>>>>>>>>>> +{
>>>>>>>>>> +    snprintf(opt_ept_setting, sizeof(opt_ept_setting), "pml=%d", opt_ept_pml);
>>>>>>>>>> +    if ( opt_ept_ad >= 0 )
>>>>>>>>>> +        update_ept_param_append("ad", opt_ept_ad);
>>>>>>>>>
>>>>>>>>> This won't correctly reflect reality: If you look at
>>>>>>>>> vmx_init_vmcs_config(), even a negative value means "true" here,
>>>>>>>>> unless on a specific Atom model. I think init_ept_param() wants
>>>>>>>>> to have that erratum workaround logic moved there, such that
>>>>>>>>> you can then assme the value to be non-negative here.
>>>>>>>>
>>>>>>>> But isn't not mentioning it in the -1 case correct? -1 means: do the
>>>>>>>> correct thing on the current hardware.
>>>>>>>
>>>>>>> Well, I think the output here should represent effective settings,
>>>>>>
>>>>>> The minimum requirement is to reflect the effective parameters, like
>>>>>> cmdline is doing for boot-time only parameters. With runtime parameters
>>>>>> we had no way of telling what was set, and this is now possible.
>>>>>>
>>>>>>> and a sub-item should be suppressed only if a setting has no effect
>>>>>>> at all in the current setup, like ...
>>>>>>>
>>>>>>>>>> +    if ( opt_ept_exec_sp >= 0 )
>>>>>>>>>> +        update_ept_param_append("exec-sp", opt_ept_exec_sp);
>>>>>>>>>
>>>>>>>>> I agree for this one - if the value is still -1, it has neither
>>>>>>>>> been set nor is its value of any interest.
>>>>>>>
>>>>>>> ... here.
>>>>>>
>>>>>> I think we should not mix up specified parameters and effective
>>>>>> settings. In case an effective setting is of common interest it should
>>>>>> be reported via a specific node (like e.g. specific mitigation settings
>>>>>> where the cmdline is not providing enough details).
>>>>>
>>>>> But then a boolean option that wasn't specified on the command line
>>>>> should produce no output at all. And hence we'd need a way to tell
>>>>> whether an option was set from command line for _all_ of them. I
>>>>> don't think this would be very helpful.
>>>>
>>>> I disagree here.
>>>>
>>>> This is important only for cases where the hypervisor treats the
>>>> parameter as a tristate: true/false/unspecified. In all cases where
>>>> the bool value is really true or false it can be reported as such.
>>>
>>> The problem I'm having with this is the resulting inconsistency:
>>> When we write the variable with 0 or 1 in case we find it to be
>>> -1 after command line parsing, the externally visible effect will
>>> be different from the case where we leave it to be -1 yet still
>>> treat it as (pseudo-)boolean. This, however, is an implementation
>>> detail, while imo the hypfs presentation should not depend on
>>> such implementation details.
>>>
>>>> Reporting 0/1 for e.g. "ad" if opt_ept_ad==-1 would add a latent problem
>>>> if any other action would be derived from the parameter variable being
>>>> -1.
>>>>
>>>> So either opt_ept_ad should be modified to change it to 0/1 instead of
>>>> only setting the VCMS flag,
>>>
>>> That's what I did suggest.
>>>
>>>> or the logic should be kept as is in this
>>>> patch. IMO changing the setting of opt_ept_ad should be done in another
>>>> patch if this is really wanted.
>>>
>>> And of course I don't mind at all doing so in a prereq patch.
>>> It's just that the patch here provides a good place _where_ to
>>> actually do such an adjustment.
>>
>> I was thinking of something like this:
>>
>> --- a/xen/arch/x86/hvm/vmx/vmcs.c
>> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
>> @@ -313,12 +313,12 @@ static int vmx_init_vmcs_config(void)
>>        {
>>            rdmsrl(MSR_IA32_VMX_EPT_VPID_CAP, _vmx_ept_vpid_cap);
>>
>> +        if ( /* Work around Erratum AVR41 on Avoton processors. */
>> +             boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x4d &&
>> +             opt_ept_ad < 0 )
>> +            opt_ept_ad = 0;
>>            if ( !opt_ept_ad )
>>                _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT;
>> -        else if ( /* Work around Erratum AVR41 on Avoton processors. */
>> -                  boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x4d &&
>> -                  opt_ept_ad < 0 )
>> -            _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT;
>>
>>            /*
>>             * Additional sanity checking before using EPT:
> 
> And I was specifically hoping to avoid doing this in a non-__init
> function.

Should be fairly easy (see attached patch).


Juergen
Jan Beulich March 6, 2020, 9:04 a.m. UTC | #11
On 06.03.2020 09:47, Jürgen Groß wrote:
> On 06.03.20 09:20, Jan Beulich wrote:
>> On 06.03.2020 07:42, Jürgen Groß wrote:
>>> On 05.03.20 09:26, Jan Beulich wrote:
>>>> On 05.03.2020 07:01, Jürgen Groß wrote:
>>>>> On 04.03.20 17:56, Jan Beulich wrote:
>>>>>> On 04.03.2020 17:31, Jürgen Groß wrote:
>>>>>>> On 04.03.20 16:19, Jan Beulich wrote:
>>>>>>>> On 04.03.2020 16:07, Jürgen Groß wrote:
>>>>>>>>> On 04.03.20 12:32, Jan Beulich wrote:
>>>>>>>>>> On 26.02.2020 13:47, Juergen Gross wrote:
>>>>>>>>>>> +static void update_ept_param_append(const char *str, int val)
>>>>>>>>>>> +{
>>>>>>>>>>> +    char *pos = opt_ept_setting + strlen(opt_ept_setting);
>>>>>>>>>>> +
>>>>>>>>>>> +    snprintf(pos, sizeof(opt_ept_setting) - (pos - opt_ept_setting),
>>>>>>>>>>> +             ",%s=%d", str, val);
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>> +static void update_ept_param(void)
>>>>>>>>>>> +{
>>>>>>>>>>> +    snprintf(opt_ept_setting, sizeof(opt_ept_setting), "pml=%d", opt_ept_pml);
>>>>>>>>>>> +    if ( opt_ept_ad >= 0 )
>>>>>>>>>>> +        update_ept_param_append("ad", opt_ept_ad);
>>>>>>>>>>
>>>>>>>>>> This won't correctly reflect reality: If you look at
>>>>>>>>>> vmx_init_vmcs_config(), even a negative value means "true" here,
>>>>>>>>>> unless on a specific Atom model. I think init_ept_param() wants
>>>>>>>>>> to have that erratum workaround logic moved there, such that
>>>>>>>>>> you can then assme the value to be non-negative here.
>>>>>>>>>
>>>>>>>>> But isn't not mentioning it in the -1 case correct? -1 means: do the
>>>>>>>>> correct thing on the current hardware.
>>>>>>>>
>>>>>>>> Well, I think the output here should represent effective settings,
>>>>>>>
>>>>>>> The minimum requirement is to reflect the effective parameters, like
>>>>>>> cmdline is doing for boot-time only parameters. With runtime parameters
>>>>>>> we had no way of telling what was set, and this is now possible.
>>>>>>>
>>>>>>>> and a sub-item should be suppressed only if a setting has no effect
>>>>>>>> at all in the current setup, like ...
>>>>>>>>
>>>>>>>>>>> +    if ( opt_ept_exec_sp >= 0 )
>>>>>>>>>>> +        update_ept_param_append("exec-sp", opt_ept_exec_sp);
>>>>>>>>>>
>>>>>>>>>> I agree for this one - if the value is still -1, it has neither
>>>>>>>>>> been set nor is its value of any interest.
>>>>>>>>
>>>>>>>> ... here.
>>>>>>>
>>>>>>> I think we should not mix up specified parameters and effective
>>>>>>> settings. In case an effective setting is of common interest it should
>>>>>>> be reported via a specific node (like e.g. specific mitigation settings
>>>>>>> where the cmdline is not providing enough details).
>>>>>>
>>>>>> But then a boolean option that wasn't specified on the command line
>>>>>> should produce no output at all. And hence we'd need a way to tell
>>>>>> whether an option was set from command line for _all_ of them. I
>>>>>> don't think this would be very helpful.
>>>>>
>>>>> I disagree here.
>>>>>
>>>>> This is important only for cases where the hypervisor treats the
>>>>> parameter as a tristate: true/false/unspecified. In all cases where
>>>>> the bool value is really true or false it can be reported as such.
>>>>
>>>> The problem I'm having with this is the resulting inconsistency:
>>>> When we write the variable with 0 or 1 in case we find it to be
>>>> -1 after command line parsing, the externally visible effect will
>>>> be different from the case where we leave it to be -1 yet still
>>>> treat it as (pseudo-)boolean. This, however, is an implementation
>>>> detail, while imo the hypfs presentation should not depend on
>>>> such implementation details.
>>>>
>>>>> Reporting 0/1 for e.g. "ad" if opt_ept_ad==-1 would add a latent problem
>>>>> if any other action would be derived from the parameter variable being
>>>>> -1.
>>>>>
>>>>> So either opt_ept_ad should be modified to change it to 0/1 instead of
>>>>> only setting the VCMS flag,
>>>>
>>>> That's what I did suggest.
>>>>
>>>>> or the logic should be kept as is in this
>>>>> patch. IMO changing the setting of opt_ept_ad should be done in another
>>>>> patch if this is really wanted.
>>>>
>>>> And of course I don't mind at all doing so in a prereq patch.
>>>> It's just that the patch here provides a good place _where_ to
>>>> actually do such an adjustment.
>>>
>>> I was thinking of something like this:
>>>
>>> --- a/xen/arch/x86/hvm/vmx/vmcs.c
>>> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
>>> @@ -313,12 +313,12 @@ static int vmx_init_vmcs_config(void)
>>>        {
>>>            rdmsrl(MSR_IA32_VMX_EPT_VPID_CAP, _vmx_ept_vpid_cap);
>>>
>>> +        if ( /* Work around Erratum AVR41 on Avoton processors. */
>>> +             boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x4d &&
>>> +             opt_ept_ad < 0 )
>>> +            opt_ept_ad = 0;
>>>            if ( !opt_ept_ad )
>>>                _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT;
>>> -        else if ( /* Work around Erratum AVR41 on Avoton processors. */
>>> -                  boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x4d &&
>>> -                  opt_ept_ad < 0 )
>>> -            _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT;
>>>
>>>            /*
>>>             * Additional sanity checking before using EPT:
>>
>> And I was specifically hoping to avoid doing this in a non-__init
>> function.
> 
> Should be fairly easy (see attached patch).

Why not put the opt_ept_ad adjustment right into start_vmx(),
just like e.g. the opt_ept_exec_sp gets also done there? Pulling
the setting up of the 'v' key handler risks installing it ahead
of a potential future later error exit from start_vmx(). But I'm
not entirely opposed to the chosen approach either - it'll be up
to Kevin to judge, I guess.

Jan
Jürgen Groß March 6, 2020, 9:20 a.m. UTC | #12
On 06.03.20 10:04, Jan Beulich wrote:
> On 06.03.2020 09:47, Jürgen Groß wrote:
>> On 06.03.20 09:20, Jan Beulich wrote:
>>> On 06.03.2020 07:42, Jürgen Groß wrote:
>>>> On 05.03.20 09:26, Jan Beulich wrote:
>>>>> On 05.03.2020 07:01, Jürgen Groß wrote:
>>>>>> On 04.03.20 17:56, Jan Beulich wrote:
>>>>>>> On 04.03.2020 17:31, Jürgen Groß wrote:
>>>>>>>> On 04.03.20 16:19, Jan Beulich wrote:
>>>>>>>>> On 04.03.2020 16:07, Jürgen Groß wrote:
>>>>>>>>>> On 04.03.20 12:32, Jan Beulich wrote:
>>>>>>>>>>> On 26.02.2020 13:47, Juergen Gross wrote:
>>>>>>>>>>>> +static void update_ept_param_append(const char *str, int val)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +    char *pos = opt_ept_setting + strlen(opt_ept_setting);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    snprintf(pos, sizeof(opt_ept_setting) - (pos - opt_ept_setting),
>>>>>>>>>>>> +             ",%s=%d", str, val);
>>>>>>>>>>>> +}
>>>>>>>>>>>> +
>>>>>>>>>>>> +static void update_ept_param(void)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +    snprintf(opt_ept_setting, sizeof(opt_ept_setting), "pml=%d", opt_ept_pml);
>>>>>>>>>>>> +    if ( opt_ept_ad >= 0 )
>>>>>>>>>>>> +        update_ept_param_append("ad", opt_ept_ad);
>>>>>>>>>>>
>>>>>>>>>>> This won't correctly reflect reality: If you look at
>>>>>>>>>>> vmx_init_vmcs_config(), even a negative value means "true" here,
>>>>>>>>>>> unless on a specific Atom model. I think init_ept_param() wants
>>>>>>>>>>> to have that erratum workaround logic moved there, such that
>>>>>>>>>>> you can then assme the value to be non-negative here.
>>>>>>>>>>
>>>>>>>>>> But isn't not mentioning it in the -1 case correct? -1 means: do the
>>>>>>>>>> correct thing on the current hardware.
>>>>>>>>>
>>>>>>>>> Well, I think the output here should represent effective settings,
>>>>>>>>
>>>>>>>> The minimum requirement is to reflect the effective parameters, like
>>>>>>>> cmdline is doing for boot-time only parameters. With runtime parameters
>>>>>>>> we had no way of telling what was set, and this is now possible.
>>>>>>>>
>>>>>>>>> and a sub-item should be suppressed only if a setting has no effect
>>>>>>>>> at all in the current setup, like ...
>>>>>>>>>
>>>>>>>>>>>> +    if ( opt_ept_exec_sp >= 0 )
>>>>>>>>>>>> +        update_ept_param_append("exec-sp", opt_ept_exec_sp);
>>>>>>>>>>>
>>>>>>>>>>> I agree for this one - if the value is still -1, it has neither
>>>>>>>>>>> been set nor is its value of any interest.
>>>>>>>>>
>>>>>>>>> ... here.
>>>>>>>>
>>>>>>>> I think we should not mix up specified parameters and effective
>>>>>>>> settings. In case an effective setting is of common interest it should
>>>>>>>> be reported via a specific node (like e.g. specific mitigation settings
>>>>>>>> where the cmdline is not providing enough details).
>>>>>>>
>>>>>>> But then a boolean option that wasn't specified on the command line
>>>>>>> should produce no output at all. And hence we'd need a way to tell
>>>>>>> whether an option was set from command line for _all_ of them. I
>>>>>>> don't think this would be very helpful.
>>>>>>
>>>>>> I disagree here.
>>>>>>
>>>>>> This is important only for cases where the hypervisor treats the
>>>>>> parameter as a tristate: true/false/unspecified. In all cases where
>>>>>> the bool value is really true or false it can be reported as such.
>>>>>
>>>>> The problem I'm having with this is the resulting inconsistency:
>>>>> When we write the variable with 0 or 1 in case we find it to be
>>>>> -1 after command line parsing, the externally visible effect will
>>>>> be different from the case where we leave it to be -1 yet still
>>>>> treat it as (pseudo-)boolean. This, however, is an implementation
>>>>> detail, while imo the hypfs presentation should not depend on
>>>>> such implementation details.
>>>>>
>>>>>> Reporting 0/1 for e.g. "ad" if opt_ept_ad==-1 would add a latent problem
>>>>>> if any other action would be derived from the parameter variable being
>>>>>> -1.
>>>>>>
>>>>>> So either opt_ept_ad should be modified to change it to 0/1 instead of
>>>>>> only setting the VCMS flag,
>>>>>
>>>>> That's what I did suggest.
>>>>>
>>>>>> or the logic should be kept as is in this
>>>>>> patch. IMO changing the setting of opt_ept_ad should be done in another
>>>>>> patch if this is really wanted.
>>>>>
>>>>> And of course I don't mind at all doing so in a prereq patch.
>>>>> It's just that the patch here provides a good place _where_ to
>>>>> actually do such an adjustment.
>>>>
>>>> I was thinking of something like this:
>>>>
>>>> --- a/xen/arch/x86/hvm/vmx/vmcs.c
>>>> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
>>>> @@ -313,12 +313,12 @@ static int vmx_init_vmcs_config(void)
>>>>         {
>>>>             rdmsrl(MSR_IA32_VMX_EPT_VPID_CAP, _vmx_ept_vpid_cap);
>>>>
>>>> +        if ( /* Work around Erratum AVR41 on Avoton processors. */
>>>> +             boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x4d &&
>>>> +             opt_ept_ad < 0 )
>>>> +            opt_ept_ad = 0;
>>>>             if ( !opt_ept_ad )
>>>>                 _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT;
>>>> -        else if ( /* Work around Erratum AVR41 on Avoton processors. */
>>>> -                  boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x4d &&
>>>> -                  opt_ept_ad < 0 )
>>>> -            _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT;
>>>>
>>>>             /*
>>>>              * Additional sanity checking before using EPT:
>>>
>>> And I was specifically hoping to avoid doing this in a non-__init
>>> function.
>>
>> Should be fairly easy (see attached patch).
> 
> Why not put the opt_ept_ad adjustment right into start_vmx(),
> just like e.g. the opt_ept_exec_sp gets also done there? Pulling
> the setting up of the 'v' key handler risks installing it ahead
> of a potential future later error exit from start_vmx(). But I'm

Is this really problematic?

How probable is it to have a HVM domain running when an early error
exit from start_vmx() happened (hint: hvm_enabled will be 0 in this
case) ?

> not entirely opposed to the chosen approach either - it'll be up
> to Kevin to judge, I guess.


Juergen
Jan Beulich March 6, 2020, 9:22 a.m. UTC | #13
On 06.03.2020 10:20, Jürgen Groß wrote:
> On 06.03.20 10:04, Jan Beulich wrote:
>> On 06.03.2020 09:47, Jürgen Groß wrote:
>>> On 06.03.20 09:20, Jan Beulich wrote:
>>>> On 06.03.2020 07:42, Jürgen Groß wrote:
>>>>> On 05.03.20 09:26, Jan Beulich wrote:
>>>>>> On 05.03.2020 07:01, Jürgen Groß wrote:
>>>>>>> On 04.03.20 17:56, Jan Beulich wrote:
>>>>>>>> On 04.03.2020 17:31, Jürgen Groß wrote:
>>>>>>>>> On 04.03.20 16:19, Jan Beulich wrote:
>>>>>>>>>> On 04.03.2020 16:07, Jürgen Groß wrote:
>>>>>>>>>>> On 04.03.20 12:32, Jan Beulich wrote:
>>>>>>>>>>>> On 26.02.2020 13:47, Juergen Gross wrote:
>>>>>>>>>>>>> +static void update_ept_param_append(const char *str, int val)
>>>>>>>>>>>>> +{
>>>>>>>>>>>>> +    char *pos = opt_ept_setting + strlen(opt_ept_setting);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    snprintf(pos, sizeof(opt_ept_setting) - (pos - opt_ept_setting),
>>>>>>>>>>>>> +             ",%s=%d", str, val);
>>>>>>>>>>>>> +}
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +static void update_ept_param(void)
>>>>>>>>>>>>> +{
>>>>>>>>>>>>> +    snprintf(opt_ept_setting, sizeof(opt_ept_setting), "pml=%d", opt_ept_pml);
>>>>>>>>>>>>> +    if ( opt_ept_ad >= 0 )
>>>>>>>>>>>>> +        update_ept_param_append("ad", opt_ept_ad);
>>>>>>>>>>>>
>>>>>>>>>>>> This won't correctly reflect reality: If you look at
>>>>>>>>>>>> vmx_init_vmcs_config(), even a negative value means "true" here,
>>>>>>>>>>>> unless on a specific Atom model. I think init_ept_param() wants
>>>>>>>>>>>> to have that erratum workaround logic moved there, such that
>>>>>>>>>>>> you can then assme the value to be non-negative here.
>>>>>>>>>>>
>>>>>>>>>>> But isn't not mentioning it in the -1 case correct? -1 means: do the
>>>>>>>>>>> correct thing on the current hardware.
>>>>>>>>>>
>>>>>>>>>> Well, I think the output here should represent effective settings,
>>>>>>>>>
>>>>>>>>> The minimum requirement is to reflect the effective parameters, like
>>>>>>>>> cmdline is doing for boot-time only parameters. With runtime parameters
>>>>>>>>> we had no way of telling what was set, and this is now possible.
>>>>>>>>>
>>>>>>>>>> and a sub-item should be suppressed only if a setting has no effect
>>>>>>>>>> at all in the current setup, like ...
>>>>>>>>>>
>>>>>>>>>>>>> +    if ( opt_ept_exec_sp >= 0 )
>>>>>>>>>>>>> +        update_ept_param_append("exec-sp", opt_ept_exec_sp);
>>>>>>>>>>>>
>>>>>>>>>>>> I agree for this one - if the value is still -1, it has neither
>>>>>>>>>>>> been set nor is its value of any interest.
>>>>>>>>>>
>>>>>>>>>> ... here.
>>>>>>>>>
>>>>>>>>> I think we should not mix up specified parameters and effective
>>>>>>>>> settings. In case an effective setting is of common interest it should
>>>>>>>>> be reported via a specific node (like e.g. specific mitigation settings
>>>>>>>>> where the cmdline is not providing enough details).
>>>>>>>>
>>>>>>>> But then a boolean option that wasn't specified on the command line
>>>>>>>> should produce no output at all. And hence we'd need a way to tell
>>>>>>>> whether an option was set from command line for _all_ of them. I
>>>>>>>> don't think this would be very helpful.
>>>>>>>
>>>>>>> I disagree here.
>>>>>>>
>>>>>>> This is important only for cases where the hypervisor treats the
>>>>>>> parameter as a tristate: true/false/unspecified. In all cases where
>>>>>>> the bool value is really true or false it can be reported as such.
>>>>>>
>>>>>> The problem I'm having with this is the resulting inconsistency:
>>>>>> When we write the variable with 0 or 1 in case we find it to be
>>>>>> -1 after command line parsing, the externally visible effect will
>>>>>> be different from the case where we leave it to be -1 yet still
>>>>>> treat it as (pseudo-)boolean. This, however, is an implementation
>>>>>> detail, while imo the hypfs presentation should not depend on
>>>>>> such implementation details.
>>>>>>
>>>>>>> Reporting 0/1 for e.g. "ad" if opt_ept_ad==-1 would add a latent problem
>>>>>>> if any other action would be derived from the parameter variable being
>>>>>>> -1.
>>>>>>>
>>>>>>> So either opt_ept_ad should be modified to change it to 0/1 instead of
>>>>>>> only setting the VCMS flag,
>>>>>>
>>>>>> That's what I did suggest.
>>>>>>
>>>>>>> or the logic should be kept as is in this
>>>>>>> patch. IMO changing the setting of opt_ept_ad should be done in another
>>>>>>> patch if this is really wanted.
>>>>>>
>>>>>> And of course I don't mind at all doing so in a prereq patch.
>>>>>> It's just that the patch here provides a good place _where_ to
>>>>>> actually do such an adjustment.
>>>>>
>>>>> I was thinking of something like this:
>>>>>
>>>>> --- a/xen/arch/x86/hvm/vmx/vmcs.c
>>>>> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
>>>>> @@ -313,12 +313,12 @@ static int vmx_init_vmcs_config(void)
>>>>>         {
>>>>>             rdmsrl(MSR_IA32_VMX_EPT_VPID_CAP, _vmx_ept_vpid_cap);
>>>>>
>>>>> +        if ( /* Work around Erratum AVR41 on Avoton processors. */
>>>>> +             boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x4d &&
>>>>> +             opt_ept_ad < 0 )
>>>>> +            opt_ept_ad = 0;
>>>>>             if ( !opt_ept_ad )
>>>>>                 _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT;
>>>>> -        else if ( /* Work around Erratum AVR41 on Avoton processors. */
>>>>> -                  boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x4d &&
>>>>> -                  opt_ept_ad < 0 )
>>>>> -            _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT;
>>>>>
>>>>>             /*
>>>>>              * Additional sanity checking before using EPT:
>>>>
>>>> And I was specifically hoping to avoid doing this in a non-__init
>>>> function.
>>>
>>> Should be fairly easy (see attached patch).
>>
>> Why not put the opt_ept_ad adjustment right into start_vmx(),
>> just like e.g. the opt_ept_exec_sp gets also done there? Pulling
>> the setting up of the 'v' key handler risks installing it ahead
>> of a potential future later error exit from start_vmx(). But I'm
> 
> Is this really problematic?

Not _really_, but still. In particular I'd prefer the 'v' key to
not even be listed among 'h' key output in such a case.

Jan
Jürgen Groß March 6, 2020, 9:27 a.m. UTC | #14
On 06.03.20 10:22, Jan Beulich wrote:
> On 06.03.2020 10:20, Jürgen Groß wrote:
>> On 06.03.20 10:04, Jan Beulich wrote:
>>> On 06.03.2020 09:47, Jürgen Groß wrote:
>>>> On 06.03.20 09:20, Jan Beulich wrote:
>>>>> On 06.03.2020 07:42, Jürgen Groß wrote:
>>>>>> On 05.03.20 09:26, Jan Beulich wrote:
>>>>>>> On 05.03.2020 07:01, Jürgen Groß wrote:
>>>>>>>> On 04.03.20 17:56, Jan Beulich wrote:
>>>>>>>>> On 04.03.2020 17:31, Jürgen Groß wrote:
>>>>>>>>>> On 04.03.20 16:19, Jan Beulich wrote:
>>>>>>>>>>> On 04.03.2020 16:07, Jürgen Groß wrote:
>>>>>>>>>>>> On 04.03.20 12:32, Jan Beulich wrote:
>>>>>>>>>>>>> On 26.02.2020 13:47, Juergen Gross wrote:
>>>>>>>>>>>>>> +static void update_ept_param_append(const char *str, int val)
>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>> +    char *pos = opt_ept_setting + strlen(opt_ept_setting);
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +    snprintf(pos, sizeof(opt_ept_setting) - (pos - opt_ept_setting),
>>>>>>>>>>>>>> +             ",%s=%d", str, val);
>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +static void update_ept_param(void)
>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>> +    snprintf(opt_ept_setting, sizeof(opt_ept_setting), "pml=%d", opt_ept_pml);
>>>>>>>>>>>>>> +    if ( opt_ept_ad >= 0 )
>>>>>>>>>>>>>> +        update_ept_param_append("ad", opt_ept_ad);
>>>>>>>>>>>>>
>>>>>>>>>>>>> This won't correctly reflect reality: If you look at
>>>>>>>>>>>>> vmx_init_vmcs_config(), even a negative value means "true" here,
>>>>>>>>>>>>> unless on a specific Atom model. I think init_ept_param() wants
>>>>>>>>>>>>> to have that erratum workaround logic moved there, such that
>>>>>>>>>>>>> you can then assme the value to be non-negative here.
>>>>>>>>>>>>
>>>>>>>>>>>> But isn't not mentioning it in the -1 case correct? -1 means: do the
>>>>>>>>>>>> correct thing on the current hardware.
>>>>>>>>>>>
>>>>>>>>>>> Well, I think the output here should represent effective settings,
>>>>>>>>>>
>>>>>>>>>> The minimum requirement is to reflect the effective parameters, like
>>>>>>>>>> cmdline is doing for boot-time only parameters. With runtime parameters
>>>>>>>>>> we had no way of telling what was set, and this is now possible.
>>>>>>>>>>
>>>>>>>>>>> and a sub-item should be suppressed only if a setting has no effect
>>>>>>>>>>> at all in the current setup, like ...
>>>>>>>>>>>
>>>>>>>>>>>>>> +    if ( opt_ept_exec_sp >= 0 )
>>>>>>>>>>>>>> +        update_ept_param_append("exec-sp", opt_ept_exec_sp);
>>>>>>>>>>>>>
>>>>>>>>>>>>> I agree for this one - if the value is still -1, it has neither
>>>>>>>>>>>>> been set nor is its value of any interest.
>>>>>>>>>>>
>>>>>>>>>>> ... here.
>>>>>>>>>>
>>>>>>>>>> I think we should not mix up specified parameters and effective
>>>>>>>>>> settings. In case an effective setting is of common interest it should
>>>>>>>>>> be reported via a specific node (like e.g. specific mitigation settings
>>>>>>>>>> where the cmdline is not providing enough details).
>>>>>>>>>
>>>>>>>>> But then a boolean option that wasn't specified on the command line
>>>>>>>>> should produce no output at all. And hence we'd need a way to tell
>>>>>>>>> whether an option was set from command line for _all_ of them. I
>>>>>>>>> don't think this would be very helpful.
>>>>>>>>
>>>>>>>> I disagree here.
>>>>>>>>
>>>>>>>> This is important only for cases where the hypervisor treats the
>>>>>>>> parameter as a tristate: true/false/unspecified. In all cases where
>>>>>>>> the bool value is really true or false it can be reported as such.
>>>>>>>
>>>>>>> The problem I'm having with this is the resulting inconsistency:
>>>>>>> When we write the variable with 0 or 1 in case we find it to be
>>>>>>> -1 after command line parsing, the externally visible effect will
>>>>>>> be different from the case where we leave it to be -1 yet still
>>>>>>> treat it as (pseudo-)boolean. This, however, is an implementation
>>>>>>> detail, while imo the hypfs presentation should not depend on
>>>>>>> such implementation details.
>>>>>>>
>>>>>>>> Reporting 0/1 for e.g. "ad" if opt_ept_ad==-1 would add a latent problem
>>>>>>>> if any other action would be derived from the parameter variable being
>>>>>>>> -1.
>>>>>>>>
>>>>>>>> So either opt_ept_ad should be modified to change it to 0/1 instead of
>>>>>>>> only setting the VCMS flag,
>>>>>>>
>>>>>>> That's what I did suggest.
>>>>>>>
>>>>>>>> or the logic should be kept as is in this
>>>>>>>> patch. IMO changing the setting of opt_ept_ad should be done in another
>>>>>>>> patch if this is really wanted.
>>>>>>>
>>>>>>> And of course I don't mind at all doing so in a prereq patch.
>>>>>>> It's just that the patch here provides a good place _where_ to
>>>>>>> actually do such an adjustment.
>>>>>>
>>>>>> I was thinking of something like this:
>>>>>>
>>>>>> --- a/xen/arch/x86/hvm/vmx/vmcs.c
>>>>>> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
>>>>>> @@ -313,12 +313,12 @@ static int vmx_init_vmcs_config(void)
>>>>>>          {
>>>>>>              rdmsrl(MSR_IA32_VMX_EPT_VPID_CAP, _vmx_ept_vpid_cap);
>>>>>>
>>>>>> +        if ( /* Work around Erratum AVR41 on Avoton processors. */
>>>>>> +             boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x4d &&
>>>>>> +             opt_ept_ad < 0 )
>>>>>> +            opt_ept_ad = 0;
>>>>>>              if ( !opt_ept_ad )
>>>>>>                  _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT;
>>>>>> -        else if ( /* Work around Erratum AVR41 on Avoton processors. */
>>>>>> -                  boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x4d &&
>>>>>> -                  opt_ept_ad < 0 )
>>>>>> -            _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT;
>>>>>>
>>>>>>              /*
>>>>>>               * Additional sanity checking before using EPT:
>>>>>
>>>>> And I was specifically hoping to avoid doing this in a non-__init
>>>>> function.
>>>>
>>>> Should be fairly easy (see attached patch).
>>>
>>> Why not put the opt_ept_ad adjustment right into start_vmx(),
>>> just like e.g. the opt_ept_exec_sp gets also done there? Pulling
>>> the setting up of the 'v' key handler risks installing it ahead
>>> of a potential future later error exit from start_vmx(). But I'm
>>
>> Is this really problematic?
> 
> Not _really_, but still. In particular I'd prefer the 'v' key to
> not even be listed among 'h' key output in such a case.

Now this is an optimization for a supposedly never to happen error
case only theoretically possible with future code additions.

In order to prepare for this case I don't think we should export
opt_ept_ad and put the setting of it at the very first thing to do
in start_vmx().


Juergen
Julien Grall March 23, 2020, 10:38 a.m. UTC | #15
Hi Juergen & Jan,

On 26/02/2020 12:47, Juergen Gross wrote:
> diff --git a/docs/misc/hypfs-paths.pandoc b/docs/misc/hypfs-paths.pandoc
> index 1faebcccbc..b4a5b6086e 100644
> --- a/docs/misc/hypfs-paths.pandoc
> +++ b/docs/misc/hypfs-paths.pandoc
> @@ -152,3 +152,12 @@ The major version of Xen.
>   #### /buildinfo/version/minor = INTEGER
>   
>   The minor version of Xen.
> +
> +#### /params/
> +
> +A directory of runtime parameters.
> +
> +#### /params/*
> +
> +The individual parameters. The description of the different parameters can be
> +found in `docs/misc/xen-command-line.pandoc`.
> diff --git a/xen/arch/arm/xen.lds.S b/xen/arch/arm/xen.lds.S
> index a497f6a48d..0061a8dfea 100644
> --- a/xen/arch/arm/xen.lds.S
> +++ b/xen/arch/arm/xen.lds.S
> @@ -89,6 +89,11 @@ SECTIONS
>          __start_schedulers_array = .;
>          *(.data.schedulers)
>          __end_schedulers_array = .;
> +
> +       . = ALIGN(8);

Apologies for the late answer. I noticed that Jan asked the following 
question on v5:

"Do you really need 8-byte alignment even on 32-bit Arm?"

We forbid unaligned access on 32-bit Arm (and unaligned access should be 
avoided on 64-bit), so if the structure contains a 64-bit type, then we 
definitely need the data to be 8-byte aligned.

What is the expected alignment of the structure?

Cheers,
diff mbox series

Patch

diff --git a/docs/misc/hypfs-paths.pandoc b/docs/misc/hypfs-paths.pandoc
index 1faebcccbc..b4a5b6086e 100644
--- a/docs/misc/hypfs-paths.pandoc
+++ b/docs/misc/hypfs-paths.pandoc
@@ -152,3 +152,12 @@  The major version of Xen.
 #### /buildinfo/version/minor = INTEGER
 
 The minor version of Xen.
+
+#### /params/
+
+A directory of runtime parameters.
+
+#### /params/*
+
+The individual parameters. The description of the different parameters can be
+found in `docs/misc/xen-command-line.pandoc`.
diff --git a/xen/arch/arm/xen.lds.S b/xen/arch/arm/xen.lds.S
index a497f6a48d..0061a8dfea 100644
--- a/xen/arch/arm/xen.lds.S
+++ b/xen/arch/arm/xen.lds.S
@@ -89,6 +89,11 @@  SECTIONS
        __start_schedulers_array = .;
        *(.data.schedulers)
        __end_schedulers_array = .;
+
+       . = ALIGN(8);
+       __paramhypfs_start = .;
+       *(.data.paramhypfs)
+       __paramhypfs_end = .;
        *(.data.rel)
        *(.data.rel.*)
        CONSTRUCTORS
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 65445afeb0..3b690b05ed 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -70,6 +70,30 @@  integer_param("ple_window", ple_window);
 static bool __read_mostly opt_ept_pml = true;
 static s8 __read_mostly opt_ept_ad = -1;
 int8_t __read_mostly opt_ept_exec_sp = -1;
+static char opt_ept_setting[16];
+
+static void update_ept_param_append(const char *str, int val)
+{
+    char *pos = opt_ept_setting + strlen(opt_ept_setting);
+
+    snprintf(pos, sizeof(opt_ept_setting) - (pos - opt_ept_setting),
+             ",%s=%d", str, val);
+}
+
+static void update_ept_param(void)
+{
+    snprintf(opt_ept_setting, sizeof(opt_ept_setting), "pml=%d", opt_ept_pml);
+    if ( opt_ept_ad >= 0 )
+        update_ept_param_append("ad", opt_ept_ad);
+    if ( opt_ept_exec_sp >= 0 )
+        update_ept_param_append("exec-sp", opt_ept_exec_sp);
+}
+
+static void __init init_ept_param(struct param_hypfs *par)
+{
+    custom_runtime_set_var(par, opt_ept_setting);
+    update_ept_param();
+}
 
 static int __init parse_ept_param(const char *s)
 {
@@ -93,6 +117,8 @@  static int __init parse_ept_param(const char *s)
         s = ss + 1;
     } while ( *ss );
 
+    update_ept_param();
+
     return rc;
 }
 custom_param("ept", parse_ept_param);
@@ -115,6 +141,8 @@  static int parse_ept_param_runtime(const char *s)
 
     opt_ept_exec_sp = val;
 
+    update_ept_param();
+
     rcu_read_lock(&domlist_read_lock);
     for_each_domain ( d )
     {
@@ -144,7 +172,7 @@  static int parse_ept_param_runtime(const char *s)
 
     return 0;
 }
-custom_runtime_only_param("ept", parse_ept_param_runtime);
+custom_runtime_only_param("ept", parse_ept_param_runtime, init_ept_param);
 
 /* Dynamic (run-time adjusted) execution control flags. */
 u32 vmx_pin_based_exec_control __read_mostly;
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 0b37653b12..96fae68409 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -20,8 +20,27 @@  static __read_mostly enum {
     PCID_OFF,
     PCID_ALL,
     PCID_XPTI,
-    PCID_NOXPTI
+    PCID_NOXPTI,
+    PCID_END
 } opt_pcid = PCID_XPTI;
+static const char *opt_pcid_2_string[PCID_END] = {
+    [PCID_OFF] = "off",
+    [PCID_ALL] = "on",
+    [PCID_XPTI] = "xpti",
+    [PCID_NOXPTI] = "noxpti"
+};
+static char opt_pcid_val[7];
+
+static void update_opt_pcid(void)
+{
+    strlcpy(opt_pcid_val, opt_pcid_2_string[opt_pcid], sizeof(opt_pcid_val));
+}
+
+static void __init opt_pcid_init(struct param_hypfs *par)
+{
+    custom_runtime_set_var(par, opt_pcid_val);
+    update_opt_pcid();
+}
 
 static int parse_pcid(const char *s)
 {
@@ -55,9 +74,12 @@  static int parse_pcid(const char *s)
         break;
     }
 
+    if ( !rc )
+        update_opt_pcid();
+
     return rc;
 }
-custom_runtime_param("pcid", parse_pcid);
+custom_runtime_param("pcid", parse_pcid, opt_pcid_init);
 
 static void noreturn continue_nonidle_domain(struct vcpu *v)
 {
diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
index 7f9459d683..21a37f0f57 100644
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -279,6 +279,11 @@  SECTIONS
        __start_schedulers_array = .;
        *(.data.schedulers)
        __end_schedulers_array = .;
+
+       . = ALIGN(8);
+       __paramhypfs_start = .;
+       *(.data.paramhypfs)
+       __paramhypfs_end = .;
   } :text
 
   DECL_SECTION(.data) {
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index bc37acae0e..2b7b3e2844 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -85,8 +85,10 @@  struct grant_table {
     struct grant_table_arch arch;
 };
 
-static int parse_gnttab_limit(const char *param, const char *arg,
-                              unsigned int *valp)
+#define GRANT_CUSTOM_VAL_SZ  12
+
+static int parse_gnttab_limit(const char *arg, unsigned int *valp,
+                              char *parval)
 {
     const char *e;
     unsigned long val;
@@ -99,28 +101,47 @@  static int parse_gnttab_limit(const char *param, const char *arg,
         return -ERANGE;
 
     *valp = val;
+    snprintf(parval, GRANT_CUSTOM_VAL_SZ, "%lu", val);
 
     return 0;
 }
 
 unsigned int __read_mostly opt_max_grant_frames = 64;
+static char __read_mostly opt_max_grant_frames_val[GRANT_CUSTOM_VAL_SZ];
+
+static void __init gnttab_max_frames_init(struct param_hypfs *par)
+{
+    custom_runtime_set_var(par, opt_max_grant_frames_val);
+    snprintf(opt_max_grant_frames_val, GRANT_CUSTOM_VAL_SZ, "%u",
+             opt_max_grant_frames);
+}
 
 static int parse_gnttab_max_frames(const char *arg)
 {
-    return parse_gnttab_limit("gnttab_max_frames", arg,
-                              &opt_max_grant_frames);
+    return parse_gnttab_limit(arg, &opt_max_grant_frames,
+                              opt_max_grant_frames_val);
 }
-custom_runtime_param("gnttab_max_frames", parse_gnttab_max_frames);
+custom_runtime_param("gnttab_max_frames", parse_gnttab_max_frames,
+                     gnttab_max_frames_init);
 
 static unsigned int __read_mostly opt_max_maptrack_frames = 1024;
+static char __read_mostly opt_max_maptrack_frames_val[GRANT_CUSTOM_VAL_SZ];
+
+static void __init max_maptrack_frames_init(struct param_hypfs *par)
+{
+    custom_runtime_set_var(par, opt_max_maptrack_frames_val);
+    snprintf(opt_max_maptrack_frames_val, GRANT_CUSTOM_VAL_SZ, "%u",
+             opt_max_maptrack_frames);
+}
 
 static int parse_gnttab_max_maptrack_frames(const char *arg)
 {
-    return parse_gnttab_limit("gnttab_max_maptrack_frames", arg,
-                              &opt_max_maptrack_frames);
+    return parse_gnttab_limit(arg, &opt_max_maptrack_frames,
+                              opt_max_maptrack_frames_val);
 }
 custom_runtime_param("gnttab_max_maptrack_frames",
-                     parse_gnttab_max_maptrack_frames);
+                     parse_gnttab_max_maptrack_frames,
+                     max_maptrack_frames_init);
 
 #ifndef GNTTAB_MAX_VERSION
 #define GNTTAB_MAX_VERSION 2
diff --git a/xen/common/hypfs.c b/xen/common/hypfs.c
index e6166fe1e7..9503ef0731 100644
--- a/xen/common/hypfs.c
+++ b/xen/common/hypfs.c
@@ -10,6 +10,7 @@ 
 #include <xen/hypercall.h>
 #include <xen/hypfs.h>
 #include <xen/lib.h>
+#include <xen/param.h>
 #include <xen/rwlock.h>
 #include <public/hypfs.h>
 
@@ -281,6 +282,33 @@  int hypfs_write_bool(struct hypfs_entry_leaf *leaf,
     return 0;
 }
 
+int hypfs_write_custom(struct hypfs_entry_leaf *leaf,
+                       XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
+{
+    struct param_hypfs *p;
+    char *buf;
+    int ret;
+
+    buf = xzalloc_array(char, ulen);
+    if ( !buf )
+        return -ENOMEM;
+
+    ret = -EFAULT;
+    if ( copy_from_guest(buf, uaddr, ulen) )
+        goto out;
+
+    ret = -EDOM;
+    if ( !memchr(buf, 0, ulen) )
+        goto out;
+
+    p = container_of(leaf, struct param_hypfs, hypfs);
+    ret = p->param->par.func(buf);
+
+ out:
+    xfree(buf);
+    return ret;
+}
+
 static int hypfs_write(struct hypfs_entry *entry,
                        XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
 {
@@ -347,3 +375,13 @@  long do_hypfs_op(unsigned int cmd,
 
     return ret;
 }
+
+void hypfs_write_lock(void)
+{
+    write_lock(&hypfs_lock);
+}
+
+void hypfs_write_unlock(void)
+{
+    write_unlock(&hypfs_lock);
+}
diff --git a/xen/common/kernel.c b/xen/common/kernel.c
index 4b7bc28afb..7516242337 100644
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -198,7 +198,13 @@  static void __init _cmdline_parse(const char *cmdline)
 
 int runtime_parse(const char *line)
 {
-    return parse_params(line, __param_start, __param_end);
+    int ret;
+
+    hypfs_write_lock();
+    ret = parse_params(line, __param_start, __param_end);
+    hypfs_write_unlock();
+
+    return ret;
 }
 
 /**
@@ -433,6 +439,25 @@  static int __init buildinfo_init(void)
 }
 __initcall(buildinfo_init);
 
+static HYPFS_DIR_INIT(params, "params");
+
+static int __init param_init(void)
+{
+    struct param_hypfs *param;
+
+    hypfs_add_dir(&hypfs_root, &params, true);
+
+    for ( param = __paramhypfs_start; param < __paramhypfs_end; param++ )
+    {
+        if ( param->init_leaf )
+            param->init_leaf(param);
+        hypfs_add_leaf(&params, &param->hypfs, true);
+    }
+
+    return 0;
+}
+__initcall(param_init);
+
 # define DO(fn) long do_##fn
 
 #endif
diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index 913ae1b66a..000332eb2a 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -75,12 +75,36 @@  enum con_timestamp_mode
     TSM_DATE_MS,       /* [YYYY-MM-DD HH:MM:SS.mmm] */
     TSM_BOOT,          /* [SSSSSS.uuuuuu] */
     TSM_RAW,           /* [XXXXXXXXXXXXXXXX] */
+    TSM_END
+};
+
+static const char *con_timestamp_mode_2_string[TSM_END] = {
+    [TSM_NONE] = "none",
+    [TSM_DATE] = "date",
+    [TSM_DATE_MS] = "datems",
+    [TSM_BOOT] = "boot",
+    [TSM_RAW] = "raw"
 };
 
 static enum con_timestamp_mode __read_mostly opt_con_timestamp_mode = TSM_NONE;
+static char con_timestamp_mode_val[7];
+
+static void update_con_timestamp_mode(void)
+{
+    strlcpy(con_timestamp_mode_val,
+            con_timestamp_mode_2_string[opt_con_timestamp_mode],
+            sizeof(con_timestamp_mode_val));
+}
+
+static void __init con_timestamp_mode_init(struct param_hypfs *par)
+{
+    custom_runtime_set_var(par, con_timestamp_mode_val);
+    update_con_timestamp_mode();
+}
 
 static int parse_console_timestamps(const char *s);
-custom_runtime_param("console_timestamps", parse_console_timestamps);
+custom_runtime_param("console_timestamps", parse_console_timestamps,
+                     con_timestamp_mode_init);
 
 /* conring_size: allows a large console ring than default (16kB). */
 static uint32_t __initdata opt_conring_size;
@@ -133,16 +157,39 @@  static DEFINE_SPINLOCK(console_lock);
 #define XENLOG_DEFAULT       1 /* XENLOG_WARNING */
 #define XENLOG_GUEST_DEFAULT 1 /* XENLOG_WARNING */
 
+#define LOGLVL_VAL_SZ 16
 static int __read_mostly xenlog_upper_thresh = XENLOG_UPPER_THRESHOLD;
 static int __read_mostly xenlog_lower_thresh = XENLOG_LOWER_THRESHOLD;
+static char xenlog_val[LOGLVL_VAL_SZ];
 static int __read_mostly xenlog_guest_upper_thresh =
     XENLOG_GUEST_UPPER_THRESHOLD;
 static int __read_mostly xenlog_guest_lower_thresh =
     XENLOG_GUEST_LOWER_THRESHOLD;
+static char xenlog_guest_val[LOGLVL_VAL_SZ];
 
 static int parse_loglvl(const char *s);
 static int parse_guest_loglvl(const char *s);
 
+static char *lvl2opt[] = { "none", "error", "warning", "info", "all" };
+
+static void xenlog_update_val(int lower, int upper, char *val)
+{
+    snprintf(val, LOGLVL_VAL_SZ, "%s/%s", lvl2opt[lower], lvl2opt[upper]);
+}
+
+static void __init xenlog_init(struct param_hypfs *par)
+{
+    custom_runtime_set_var(par, xenlog_val);
+    xenlog_update_val(xenlog_lower_thresh, xenlog_upper_thresh, xenlog_val);
+}
+
+static void __init xenlog_guest_init(struct param_hypfs *par)
+{
+    custom_runtime_set_var(par, xenlog_guest_val);
+    xenlog_update_val(xenlog_guest_lower_thresh, xenlog_guest_upper_thresh,
+                      xenlog_guest_val);
+}
+
 /*
  * <lvl> := none|error|warning|info|debug|all
  * loglvl=<lvl_print_always>[/<lvl_print_ratelimit>]
@@ -151,8 +198,8 @@  static int parse_guest_loglvl(const char *s);
  * Similar definitions for guest_loglvl, but applies to guest tracing.
  * Defaults: loglvl=warning ; guest_loglvl=none/warning
  */
-custom_runtime_param("loglvl", parse_loglvl);
-custom_runtime_param("guest_loglvl", parse_guest_loglvl);
+custom_runtime_param("loglvl", parse_loglvl, xenlog_init);
+custom_runtime_param("guest_loglvl", parse_guest_loglvl, xenlog_guest_init);
 
 static atomic_t print_everything = ATOMIC_INIT(0);
 
@@ -173,7 +220,7 @@  static int __parse_loglvl(const char *s, const char **ps)
     return 2; /* sane fallback */
 }
 
-static int _parse_loglvl(const char *s, int *lower, int *upper)
+static int _parse_loglvl(const char *s, int *lower, int *upper, char *val)
 {
     *lower = *upper = __parse_loglvl(s, &s);
     if ( *s == '/' )
@@ -181,18 +228,21 @@  static int _parse_loglvl(const char *s, int *lower, int *upper)
     if ( *upper < *lower )
         *upper = *lower;
 
+    xenlog_update_val(*lower, *upper, val);
+
     return *s ? -EINVAL : 0;
 }
 
 static int parse_loglvl(const char *s)
 {
-    return _parse_loglvl(s, &xenlog_lower_thresh, &xenlog_upper_thresh);
+    return _parse_loglvl(s, &xenlog_lower_thresh, &xenlog_upper_thresh,
+                         xenlog_val);
 }
 
 static int parse_guest_loglvl(const char *s)
 {
     return _parse_loglvl(s, &xenlog_guest_lower_thresh,
-                         &xenlog_guest_upper_thresh);
+                         &xenlog_guest_upper_thresh, xenlog_guest_val);
 }
 
 static char *loglvl_str(int lvl)
@@ -731,9 +781,11 @@  static int parse_console_timestamps(const char *s)
     {
     case 0:
         opt_con_timestamp_mode = TSM_NONE;
+        update_con_timestamp_mode();
         return 0;
     case 1:
         opt_con_timestamp_mode = TSM_DATE;
+        update_con_timestamp_mode();
         return 0;
     }
     if ( *s == '\0' || /* Compat for old booleanparam() */
@@ -750,6 +802,8 @@  static int parse_console_timestamps(const char *s)
     else
         return -EINVAL;
 
+    update_con_timestamp_mode();
+
     return 0;
 }
 
diff --git a/xen/include/xen/hypfs.h b/xen/include/xen/hypfs.h
index 9ecc9060a3..6c1db290cb 100644
--- a/xen/include/xen/hypfs.h
+++ b/xen/include/xen/hypfs.h
@@ -99,5 +99,9 @@  int hypfs_write_leaf(struct hypfs_entry_leaf *leaf,
                      XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen);
 int hypfs_write_bool(struct hypfs_entry_leaf *leaf,
                      XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen);
+int hypfs_write_custom(struct hypfs_entry_leaf *leaf,
+                       XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen);
+void hypfs_write_lock(void);
+void hypfs_write_unlock(void);
 
 #endif /* __XEN_HYPFS_H__ */
diff --git a/xen/include/xen/param.h b/xen/include/xen/param.h
index d4578cd27f..7184113751 100644
--- a/xen/include/xen/param.h
+++ b/xen/include/xen/param.h
@@ -1,6 +1,7 @@ 
 #ifndef _XEN_PARAM_H
 #define _XEN_PARAM_H
 
+#include <xen/hypfs.h>
 #include <xen/init.h>
 #include <xen/lib.h>
 #include <xen/stdbool.h>
@@ -25,10 +26,18 @@  struct kernel_param {
     } par;
 };
 
+struct param_hypfs {
+    const struct kernel_param *param;
+    struct hypfs_entry_leaf hypfs;
+    void (*init_leaf)(struct param_hypfs *par);
+};
+
 extern const struct kernel_param __setup_start[], __setup_end[];
 extern const struct kernel_param __param_start[], __param_end[];
+extern struct param_hypfs __paramhypfs_start[], __paramhypfs_end[];
 
 #define __dataparam       __used_section(".data.param")
+#define __paramhypfs      __used_section(".data.paramhypfs")
 
 #define __param(att)      static const att \
     __attribute__((__aligned__(sizeof(void *)))) struct kernel_param
@@ -79,41 +88,94 @@  extern const struct kernel_param __param_start[], __param_end[];
           .type = OPT_IGNORE }
 
 #define __rtparam         __param(__dataparam)
+#define __paramfs         static __paramhypfs \
+    __attribute__((__aligned__(sizeof(void *)))) struct param_hypfs
 
-#define custom_runtime_only_param(_name, _var) \
+#define custom_runtime_set_var(parfs, var) \
+    { \
+        (parfs)->hypfs.write_ptr = &(var); \
+        (parfs)->hypfs.e.size = sizeof(var); \
+    }
+
+/* initfunc needs to set size and content, e.g. via custom_runtime_set_var(). */
+#define custom_runtime_only_param(_name, _var, initfunc) \
     __rtparam __rtpar_##_var = \
       { .name = _name, \
           .type = OPT_CUSTOM, \
-          .par.func = _var }
+          .par.func = _var }; \
+    __paramfs __parfs_##_var = \
+        { .param = &__rtpar_##_var, \
+          .init_leaf = initfunc, \
+          .hypfs.e.type = XEN_HYPFS_TYPE_STRING, \
+          .hypfs.e.encoding = XEN_HYPFS_ENC_PLAIN, \
+          .hypfs.e.name = _name, \
+          .hypfs.e.read = hypfs_read_leaf, \
+          .hypfs.e.write = hypfs_write_custom }
 #define boolean_runtime_only_param(_name, _var) \
     __rtparam __rtpar_##_var = \
         { .name = _name, \
           .type = OPT_BOOL, \
           .len = sizeof(_var) + \
                  BUILD_BUG_ON_ZERO(sizeof(_var) != sizeof(bool)), \
-          .par.var = &_var }
+          .par.var = &_var }; \
+    __paramfs __parfs_##_var = \
+        { .param = &__rtpar_##_var, \
+          .hypfs.e.type = XEN_HYPFS_TYPE_BOOL, \
+          .hypfs.e.encoding = XEN_HYPFS_ENC_PLAIN, \
+          .hypfs.e.name = _name, \
+          .hypfs.e.size = sizeof(_var), \
+          .hypfs.e.read = hypfs_read_leaf, \
+          .hypfs.e.write = hypfs_write_bool, \
+          .hypfs.content = &_var }
 #define integer_runtime_only_param(_name, _var) \
     __rtparam __rtpar_##_var = \
         { .name = _name, \
           .type = OPT_UINT, \
           .len = sizeof(_var), \
-          .par.var = &_var }
+          .par.var = &_var }; \
+    __paramfs __parfs_##_var = \
+        { .param = &__rtpar_##_var, \
+          .hypfs.e.type = XEN_HYPFS_TYPE_UINT, \
+          .hypfs.e.encoding = XEN_HYPFS_ENC_PLAIN, \
+          .hypfs.e.name = _name, \
+          .hypfs.e.size = sizeof(_var), \
+          .hypfs.e.read = hypfs_read_leaf, \
+          .hypfs.e.write = hypfs_write_leaf, \
+          .hypfs.content = &_var }
 #define size_runtime_only_param(_name, _var) \
     __rtparam __rtpar_##_var = \
         { .name = _name, \
           .type = OPT_SIZE, \
           .len = sizeof(_var), \
-          .par.var = &_var }
+          .par.var = &_var }; \
+    __paramfs __parfs_##_var = \
+        { .param = &__rtpar_##_var, \
+          .hypfs.e.type = XEN_HYPFS_TYPE_UINT, \
+          .hypfs.e.encoding = XEN_HYPFS_ENC_PLAIN, \
+          .hypfs.e.name = _name, \
+          .hypfs.e.size = sizeof(_var), \
+          .hypfs.e.read = hypfs_read_leaf, \
+          .hypfs.e.write = hypfs_write_leaf, \
+          .hypfs.content = &_var }
 #define string_runtime_only_param(_name, _var) \
     __rtparam __rtpar_##_var = \
         { .name = _name, \
           .type = OPT_STR, \
           .len = sizeof(_var), \
-          .par.var = &_var }
+          .par.var = &_var }; \
+    __paramfs __parfs_##_var = \
+        { .param = &__rtpar_##_var, \
+          .hypfs.e.type = XEN_HYPFS_TYPE_STRING, \
+          .hypfs.e.encoding = XEN_HYPFS_ENC_PLAIN, \
+          .hypfs.e.name = _name, \
+          .hypfs.e.size = sizeof(_var), \
+          .hypfs.e.read = hypfs_read_leaf, \
+          .hypfs.e.write = hypfs_write_leaf, \
+          .hypfs.content = &_var }
 
-#define custom_runtime_param(_name, _var) \
+#define custom_runtime_param(_name, _var, initfunc) \
     custom_param(_name, _var); \
-    custom_runtime_only_param(_name, _var)
+    custom_runtime_only_param(_name, _var, initfunc)
 #define boolean_runtime_param(_name, _var) \
     boolean_param(_name, _var); \
     boolean_runtime_only_param(_name, _var)