diff mbox

[V2] vm_event: Allow subscribing to write events for specific MSR-s

Message ID 1460524280-6902-1-git-send-email-rcojocaru@bitdefender.com (mailing list archive)
State New, archived
Headers show

Commit Message

Razvan Cojocaru April 13, 2016, 5:11 a.m. UTC
Previously, subscribing to MSR write events was an all-or-none
approach, with special cases for introspection MSR-s. This patch
allows the vm_event consumer to specify exactly what MSR-s it is
interested in, and as a side-effect gets rid of the
vmx_introspection_force_enabled_msrs[] special case.
This replaces the previously posted "xen: Filter out MSR write
events" patch.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>

---
Changes since V1:
 - Removed ARM stubs.
 - Corrected domain_unpause(d) omission.
 - Moved enable / disable and query functions from vm_event.c to
   monitor.c.
---
 tools/libxc/include/xenctrl.h      |  4 +-
 tools/libxc/xc_monitor.c           |  6 +--
 xen/arch/x86/hvm/event.c           |  3 +-
 xen/arch/x86/hvm/hvm.c             |  3 +-
 xen/arch/x86/hvm/vmx/vmcs.c        | 26 ++-----------
 xen/arch/x86/hvm/vmx/vmx.c         | 10 ++---
 xen/arch/x86/monitor.c             | 79 ++++++++++++++++++++++++++++++++------
 xen/arch/x86/vm_event.c            | 10 +++++
 xen/include/asm-x86/domain.h       |  4 +-
 xen/include/asm-x86/hvm/hvm.h      |  8 ++--
 xen/include/asm-x86/hvm/vmx/vmcs.h |  7 ----
 xen/include/asm-x86/monitor.h      |  2 +
 xen/include/public/domctl.h        |  3 +-
 13 files changed, 99 insertions(+), 66 deletions(-)

Comments

Konrad Rzeszutek Wilk April 13, 2016, 9:47 a.m. UTC | #1
> diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
> index 1fec412..4c96968 100644
> --- a/xen/arch/x86/monitor.c
> +++ b/xen/arch/x86/monitor.c
> @@ -22,6 +22,58 @@
>  #include <asm/monitor.h>
>  #include <public/vm_event.h>
>  
> +static int arch_monitor_enable_msr(struct domain *d, u32 msr)
> +{
> +    if ( !d->arch.monitor_msr_bitmap )
> +        return -EINVAL;

I this was not set wouldn't we fail in vm_event_enable with -ENOMEM?

I presume the user can still make this hypercall..  Ah yes.

Perhaps -ENXIO?
> +
> +    if ( msr <= 0x1fff )
> +        set_bit(msr, d->arch.monitor_msr_bitmap + 0x000/BYTES_PER_LONG);

The 0x000/BYTER_PER_LONG looks odd. Is it even needed?

> +    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
> +    {
> +        msr &= 0x1fff;
> +        set_bit(msr, d->arch.monitor_msr_bitmap + 0x400/BYTES_PER_LONG);
> +    }
> +
> +    hvm_enable_msr_interception(d, msr);

And for MSRs above 0xc0001fff it is OK to enable the interception?
Or between 0x1fff and 0xc0000000?

No need to filter them out? Or error on them?
> +
> +    return 0;
> +}
> +
> +static int arch_monitor_disable_msr(struct domain *d, u32 msr)
> +{
> +    if ( !d->arch.monitor_msr_bitmap )
> +        return -EINVAL;
> +
> +    if ( msr <= 0x1fff )
> +        clear_bit(msr, d->arch.monitor_msr_bitmap + 0x000/BYTES_PER_LONG);
> +    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
> +    {
> +        msr &= 0x1fff;
> +        clear_bit(msr, d->arch.monitor_msr_bitmap + 0x400/BYTES_PER_LONG);
> +    }
> +
> +    return 0;
> +}
> +
> +bool_t arch_monitor_is_msr_enabled(const struct domain *d, u32 msr)
> +{
> +    bool_t rc = 0;
> +
> +    if ( !d->arch.monitor_msr_bitmap )
> +        return 0;
> +
> +    if ( msr <= 0x1fff )
> +        rc = test_bit(msr, d->arch.monitor_msr_bitmap + 0x000/BYTES_PER_LONG);
> +    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
> +    {
> +        msr &= 0x1fff;
> +        rc = test_bit(msr, d->arch.monitor_msr_bitmap + 0x400/BYTES_PER_LONG);
> +    }

And what if msr requested is above 0xc0001fff ? What then?

> +
> +    return rc;
> +}
> +
>  int arch_monitor_domctl_event(struct domain *d,
>                                struct xen_domctl_monitor_op *mop)
>  {
> @@ -77,25 +129,28 @@ int arch_monitor_domctl_event(struct domain *d,
>  
>      case XEN_DOMCTL_MONITOR_EVENT_MOV_TO_MSR:

Should this be renamed?
>      {
> -        bool_t old_status = ad->monitor.mov_to_msr_enabled;
> +        bool_t old_status;
> +        int rc;
> +        u32 msr = mop->u.mov_to_msr.msr;
>  
> -        if ( unlikely(old_status == requested_status) )
> -            return -EEXIST;
> +        domain_pause(d);
>  
> -        if ( requested_status && mop->u.mov_to_msr.extended_capture &&
> -             !hvm_enable_msr_exit_interception(d) )
> -            return -EOPNOTSUPP;
> +        old_status = arch_monitor_is_msr_enabled(d, msr);
>  
> -        domain_pause(d);
> +        if ( unlikely(old_status == requested_status) )
> +        {
> +            domain_unpause(d);
> +            return -EEXIST;
> +        }
>  
> -        if ( requested_status && mop->u.mov_to_msr.extended_capture )
> -            ad->monitor.mov_to_msr_extended = 1;
> +        if ( requested_status )
> +            rc = arch_monitor_enable_msr(d, msr);
>          else
> -            ad->monitor.mov_to_msr_extended = 0;
> +            rc = arch_monitor_disable_msr(d, msr);
>  
> -        ad->monitor.mov_to_msr_enabled = requested_status;
>          domain_unpause(d);
> -        break;
> +
> +        return rc;
>      }
>  
>      case XEN_DOMCTL_MONITOR_EVENT_SINGLESTEP:
> diff --git a/xen/arch/x86/vm_event.c b/xen/arch/x86/vm_event.c
> index 5635603..9b4267e 100644
> --- a/xen/arch/x86/vm_event.c
> +++ b/xen/arch/x86/vm_event.c
> @@ -27,6 +27,13 @@ int vm_event_init_domain(struct domain *d)
>  {
>      struct vcpu *v;
>  
> +    d->arch.monitor_msr_bitmap = alloc_xenheap_page();

How about using vzalloc?
> +
> +    if ( !d->arch.monitor_msr_bitmap )
> +        return -ENOMEM;
> +
> +    memset(d->arch.monitor_msr_bitmap, 0, PAGE_SIZE);

Then you don't have to do that.

> +
>      for_each_vcpu ( d, v )
>      {
>          if ( v->arch.vm_event )
> @@ -55,6 +62,9 @@ void vm_event_cleanup_domain(struct domain *d)
>          v->arch.vm_event = NULL;
>      }
>  
> +    free_xenheap_page(d->arch.monitor_msr_bitmap);

And this would be vfree.

> +    d->arch.monitor_msr_bitmap = NULL;
> +
>      d->arch.mem_access_emulate_each_rep = 0;
>      memset(&d->arch.monitor, 0, sizeof(d->arch.monitor));
>      memset(&d->monitor, 0, sizeof(d->monitor));
> diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
> index d393ed2..d8d91c2 100644
> --- a/xen/include/asm-x86/domain.h
> +++ b/xen/include/asm-x86/domain.h
> @@ -398,12 +398,12 @@ struct arch_domain
>          unsigned int write_ctrlreg_enabled       : 4;
>          unsigned int write_ctrlreg_sync          : 4;
>          unsigned int write_ctrlreg_onchangeonly  : 4;
> -        unsigned int mov_to_msr_enabled          : 1;
> -        unsigned int mov_to_msr_extended         : 1;
>          unsigned int singlestep_enabled          : 1;
>          unsigned int software_breakpoint_enabled : 1;
>      } monitor;
>  
> +    unsigned long *monitor_msr_bitmap;
> +
>      /* Mem_access emulation control */
>      bool_t mem_access_emulate_each_rep;
>  
> diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
> index 7b7ff3f..9d1c0ef 100644
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -211,7 +211,7 @@ struct hvm_function_table {
>                                    uint32_t *eax, uint32_t *ebx,
>                                    uint32_t *ecx, uint32_t *edx);
>  
> -    void (*enable_msr_exit_interception)(struct domain *d);
> +    void (*enable_msr_interception)(struct domain *d, uint32_t msr);
>      bool_t (*is_singlestep_supported)(void);
>      int (*set_mode)(struct vcpu *v, int mode);
>  
> @@ -565,11 +565,11 @@ static inline enum hvm_intblk nhvm_interrupt_blocked(struct vcpu *v)
>      return hvm_funcs.nhvm_intr_blocked(v);
>  }
>  
> -static inline bool_t hvm_enable_msr_exit_interception(struct domain *d)
> +static inline bool_t hvm_enable_msr_interception(struct domain *d, uint32_t msr)
>  {
> -    if ( hvm_funcs.enable_msr_exit_interception )
> +    if ( hvm_funcs.enable_msr_interception )
>      {
> -        hvm_funcs.enable_msr_exit_interception(d);
> +        hvm_funcs.enable_msr_interception(d, msr);
>          return 1;
>      }
>  
> diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
> index b54f52f..7bf5326 100644
> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -562,13 +562,6 @@ enum vmcs_field {
>      HOST_RIP                        = 0x00006c16,
>  };
>  
> -/*
> - * A set of MSR-s that need to be enabled for memory introspection
> - * to work.
> - */
> -extern const u32 vmx_introspection_force_enabled_msrs[];
> -extern const unsigned int vmx_introspection_force_enabled_msrs_size;
> -
>  #define VMCS_VPID_WIDTH 16
>  
>  #define MSR_TYPE_R 1
> diff --git a/xen/include/asm-x86/monitor.h b/xen/include/asm-x86/monitor.h
> index 0954b59..74e5b1b 100644
> --- a/xen/include/asm-x86/monitor.h
> +++ b/xen/include/asm-x86/monitor.h
> @@ -50,4 +50,6 @@ int arch_monitor_domctl_op(struct domain *d, struct xen_domctl_monitor_op *mop)
>  int arch_monitor_domctl_event(struct domain *d,
>                                struct xen_domctl_monitor_op *mop);
>  
> +bool_t arch_monitor_is_msr_enabled(const struct domain *d, u32 msr);
> +
>  #endif /* __ASM_X86_MONITOR_H__ */
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index 2457698..875c09a 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -1107,8 +1107,7 @@ struct xen_domctl_monitor_op {
>          } mov_to_cr;
>  
>          struct {
> -            /* Enable the capture of an extended set of MSRs */
> -            uint8_t extended_capture;
> +            uint32_t msr;

Whoa there. Isn't it expanding the structure? Will this be backwards
compatible? What if somebody is using an older version of xen-access
against this hypervisor? Will they work?

Perhaps this should have a new struct / sub-ops? And the old
'mov_to_msr' will just re-use this new fangled code?


>          } mov_to_msr;
>  
>          struct {
> -- 
> 1.9.1
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
Andrew Cooper April 13, 2016, 10:07 a.m. UTC | #2
On 13/04/16 10:47, Konrad Rzeszutek Wilk wrote:
>> diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
>> index 1fec412..4c96968 100644
>> --- a/xen/arch/x86/monitor.c
>> +++ b/xen/arch/x86/monitor.c
>> @@ -22,6 +22,58 @@
>>  #include <asm/monitor.h>
>>  #include <public/vm_event.h>
>>  
>> +static int arch_monitor_enable_msr(struct domain *d, u32 msr)
>> +{
>> +    if ( !d->arch.monitor_msr_bitmap )
>> +        return -EINVAL;
> I this was not set wouldn't we fail in vm_event_enable with -ENOMEM?
>
> I presume the user can still make this hypercall..  Ah yes.
>
> Perhaps -ENXIO?
>> +
>> +    if ( msr <= 0x1fff )
>> +        set_bit(msr, d->arch.monitor_msr_bitmap + 0x000/BYTES_PER_LONG);

(It might help to read the following review before coming back here...)

It might be clearer to express monitor_msr_bitmap as a pointer to

struct monitor_msr_bitmap {
    uint8_t low[1024];
    uint8_t hypervisor[1024];
    uint8_t high[1024];
};

which avoids the odd pointer arithmetic.

> The 0x000/BYTER_PER_LONG looks odd. Is it even needed?
>
>> +    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
>> +    {
>> +        msr &= 0x1fff;
>> +        set_bit(msr, d->arch.monitor_msr_bitmap + 0x400/BYTES_PER_LONG);

__set_bit().  I don't think you need a LOCK here.

>> +    }
>> +
>> +    hvm_enable_msr_interception(d, msr);
> And for MSRs above 0xc0001fff it is OK to enable the interception?
> Or between 0x1fff and 0xc0000000?

No real MSRs exist outside the [0..1fff] and [0xc0000000..0xc0001fff]
ranges, so will suffer a #GP.  This is even reflected in how both VT-x
and SVM do their MSR interception bitmap, which is why I specifically
suggested using the same here.

However, this case wants a range between [0x40000000..0x40001fff]

>
> No need to filter them out? Or error on them?
>> +
>> +    return 0;
>> +}
>> +
>> +static int arch_monitor_disable_msr(struct domain *d, u32 msr)
>> +{
>> +    if ( !d->arch.monitor_msr_bitmap )
>> +        return -EINVAL;
>> +
>> +    if ( msr <= 0x1fff )
>> +        clear_bit(msr, d->arch.monitor_msr_bitmap + 0x000/BYTES_PER_LONG);
>> +    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
>> +    {
>> +        msr &= 0x1fff;
>> +        clear_bit(msr, d->arch.monitor_msr_bitmap + 0x400/BYTES_PER_LONG);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +bool_t arch_monitor_is_msr_enabled(const struct domain *d, u32 msr)
>> +{
>> +    bool_t rc = 0;
>> +
>> +    if ( !d->arch.monitor_msr_bitmap )
>> +        return 0;
>> +
>> +    if ( msr <= 0x1fff )
>> +        rc = test_bit(msr, d->arch.monitor_msr_bitmap + 0x000/BYTES_PER_LONG);
>> +    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
>> +    {
>> +        msr &= 0x1fff;
>> +        rc = test_bit(msr, d->arch.monitor_msr_bitmap + 0x400/BYTES_PER_LONG);
>> +    }
> And what if msr requested is above 0xc0001fff ? What then?
>
>> +
>> +    return rc;
>> +}
>> +
>>  int arch_monitor_domctl_event(struct domain *d,
>>                                struct xen_domctl_monitor_op *mop)
>>  {
>> @@ -77,25 +129,28 @@ int arch_monitor_domctl_event(struct domain *d,
>>  
>>      case XEN_DOMCTL_MONITOR_EVENT_MOV_TO_MSR:
> Should this be renamed?
>>      {
>> -        bool_t old_status = ad->monitor.mov_to_msr_enabled;
>> +        bool_t old_status;
>> +        int rc;
>> +        u32 msr = mop->u.mov_to_msr.msr;
>>  
>> -        if ( unlikely(old_status == requested_status) )
>> -            return -EEXIST;
>> +        domain_pause(d);
>>  
>> -        if ( requested_status && mop->u.mov_to_msr.extended_capture &&
>> -             !hvm_enable_msr_exit_interception(d) )
>> -            return -EOPNOTSUPP;
>> +        old_status = arch_monitor_is_msr_enabled(d, msr);
>>  
>> -        domain_pause(d);
>> +        if ( unlikely(old_status == requested_status) )
>> +        {
>> +            domain_unpause(d);
>> +            return -EEXIST;
>> +        }
>>  
>> -        if ( requested_status && mop->u.mov_to_msr.extended_capture )
>> -            ad->monitor.mov_to_msr_extended = 1;
>> +        if ( requested_status )
>> +            rc = arch_monitor_enable_msr(d, msr);
>>          else
>> -            ad->monitor.mov_to_msr_extended = 0;
>> +            rc = arch_monitor_disable_msr(d, msr);
>>  
>> -        ad->monitor.mov_to_msr_enabled = requested_status;
>>          domain_unpause(d);
>> -        break;
>> +
>> +        return rc;
>>      }
>>  
>>      case XEN_DOMCTL_MONITOR_EVENT_SINGLESTEP:
>> diff --git a/xen/arch/x86/vm_event.c b/xen/arch/x86/vm_event.c
>> index 5635603..9b4267e 100644
>> --- a/xen/arch/x86/vm_event.c
>> +++ b/xen/arch/x86/vm_event.c
>> @@ -27,6 +27,13 @@ int vm_event_init_domain(struct domain *d)
>>  {
>>      struct vcpu *v;
>>  
>> +    d->arch.monitor_msr_bitmap = alloc_xenheap_page();
> How about using vzalloc?

vmap space is far more limited than general xenheap space.  vmap()
should only be used when you need >4K allocations contiguously in
virtual address space.

>> +
>> +    if ( !d->arch.monitor_msr_bitmap )
>> +        return -ENOMEM;
>> +
>> +    memset(d->arch.monitor_msr_bitmap, 0, PAGE_SIZE);
> Then you don't have to do that.

clear_page()

>
>> +
>>      for_each_vcpu ( d, v )
>>      {
>>          if ( v->arch.vm_event )
>> @@ -55,6 +62,9 @@ void vm_event_cleanup_domain(struct domain *d)
>>          v->arch.vm_event = NULL;
>>      }
>>  
>> +    free_xenheap_page(d->arch.monitor_msr_bitmap);
> And this would be vfree.
>
>> +    d->arch.monitor_msr_bitmap = NULL;
>> +
>>      d->arch.mem_access_emulate_each_rep = 0;
>>      memset(&d->arch.monitor, 0, sizeof(d->arch.monitor));
>>      memset(&d->monitor, 0, sizeof(d->monitor));
>> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
>> index 2457698..875c09a 100644
>> --- a/xen/include/public/domctl.h
>> +++ b/xen/include/public/domctl.h
>> @@ -1107,8 +1107,7 @@ struct xen_domctl_monitor_op {
>>          } mov_to_cr;
>>  
>>          struct {
>> -            /* Enable the capture of an extended set of MSRs */
>> -            uint8_t extended_capture;
>> +            uint32_t msr;
> Whoa there. Isn't it expanding the structure? Will this be backwards
> compatible? What if somebody is using an older version of xen-access
> against this hypervisor? Will they work?

Its a domctl.  This is perfectly fine (within the rules) to do.

~Andrew
Razvan Cojocaru April 13, 2016, 11:57 a.m. UTC | #3
On 04/13/2016 12:47 PM, Konrad Rzeszutek Wilk wrote:
>> diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
>> index 1fec412..4c96968 100644
>> --- a/xen/arch/x86/monitor.c
>> +++ b/xen/arch/x86/monitor.c
>> @@ -22,6 +22,58 @@
>>  #include <asm/monitor.h>
>>  #include <public/vm_event.h>
>>  
>> +static int arch_monitor_enable_msr(struct domain *d, u32 msr)
>> +{
>> +    if ( !d->arch.monitor_msr_bitmap )
>> +        return -EINVAL;
> 
> I this was not set wouldn't we fail in vm_event_enable with -ENOMEM?
> 
> I presume the user can still make this hypercall..  Ah yes.
> 
> Perhaps -ENXIO?

Sure, I can return -ENXIO. I just thought -EINVAL reflects the case
well: it's not right to call this hypercall if you haven't subscribed
for vm_events beforehand (in which case d->arch.monitor_msr_bitmap is
NULL, because it's only allocated then, and de-allocated again when the
subscriber unsubscribes).

>> +
>> +    if ( msr <= 0x1fff )
>> +        set_bit(msr, d->arch.monitor_msr_bitmap + 0x000/BYTES_PER_LONG);
> 
> The 0x000/BYTER_PER_LONG looks odd. Is it even needed?

I've pretty much copied the code from the enabled msrs bitmap, so I
assume it was, but I'll change the code to follow Andrew Cooper's
suggestion which should make this go away.

>> +    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
>> +    {
>> +        msr &= 0x1fff;
>> +        set_bit(msr, d->arch.monitor_msr_bitmap + 0x400/BYTES_PER_LONG);
>> +    }
>> +
>> +    hvm_enable_msr_interception(d, msr);
> 
> And for MSRs above 0xc0001fff it is OK to enable the interception?
> Or between 0x1fff and 0xc0000000?
> 
> No need to filter them out? Or error on them?
>> +
>> +    return 0;
>> +}
>> +
>> +static int arch_monitor_disable_msr(struct domain *d, u32 msr)
>> +{
>> +    if ( !d->arch.monitor_msr_bitmap )
>> +        return -EINVAL;
>> +
>> +    if ( msr <= 0x1fff )
>> +        clear_bit(msr, d->arch.monitor_msr_bitmap + 0x000/BYTES_PER_LONG);
>> +    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
>> +    {
>> +        msr &= 0x1fff;
>> +        clear_bit(msr, d->arch.monitor_msr_bitmap + 0x400/BYTES_PER_LONG);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +bool_t arch_monitor_is_msr_enabled(const struct domain *d, u32 msr)
>> +{
>> +    bool_t rc = 0;
>> +
>> +    if ( !d->arch.monitor_msr_bitmap )
>> +        return 0;
>> +
>> +    if ( msr <= 0x1fff )
>> +        rc = test_bit(msr, d->arch.monitor_msr_bitmap + 0x000/BYTES_PER_LONG);
>> +    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
>> +    {
>> +        msr &= 0x1fff;
>> +        rc = test_bit(msr, d->arch.monitor_msr_bitmap + 0x400/BYTES_PER_LONG);
>> +    }
> 
> And what if msr requested is above 0xc0001fff ? What then?

I think the questions above have been answered by Andrew Cooper.

>> +
>> +    return rc;
>> +}
>> +
>>  int arch_monitor_domctl_event(struct domain *d,
>>                                struct xen_domctl_monitor_op *mop)
>>  {
>> @@ -77,25 +129,28 @@ int arch_monitor_domctl_event(struct domain *d,
>>  
>>      case XEN_DOMCTL_MONITOR_EVENT_MOV_TO_MSR:
> 
> Should this be renamed?

I'm happy to rename it, but I don't think it should - it has the exact
same semantics as before: monitor a MSR write.

>>      {
>> -        bool_t old_status = ad->monitor.mov_to_msr_enabled;
>> +        bool_t old_status;
>> +        int rc;
>> +        u32 msr = mop->u.mov_to_msr.msr;
>>  
>> -        if ( unlikely(old_status == requested_status) )
>> -            return -EEXIST;
>> +        domain_pause(d);
>>  
>> -        if ( requested_status && mop->u.mov_to_msr.extended_capture &&
>> -             !hvm_enable_msr_exit_interception(d) )
>> -            return -EOPNOTSUPP;
>> +        old_status = arch_monitor_is_msr_enabled(d, msr);
>>  
>> -        domain_pause(d);
>> +        if ( unlikely(old_status == requested_status) )
>> +        {
>> +            domain_unpause(d);
>> +            return -EEXIST;
>> +        }
>>  
>> -        if ( requested_status && mop->u.mov_to_msr.extended_capture )
>> -            ad->monitor.mov_to_msr_extended = 1;
>> +        if ( requested_status )
>> +            rc = arch_monitor_enable_msr(d, msr);
>>          else
>> -            ad->monitor.mov_to_msr_extended = 0;
>> +            rc = arch_monitor_disable_msr(d, msr);
>>  
>> -        ad->monitor.mov_to_msr_enabled = requested_status;
>>          domain_unpause(d);
>> -        break;
>> +
>> +        return rc;
>>      }
>>  
>>      case XEN_DOMCTL_MONITOR_EVENT_SINGLESTEP:
>> diff --git a/xen/arch/x86/vm_event.c b/xen/arch/x86/vm_event.c
>> index 5635603..9b4267e 100644
>> --- a/xen/arch/x86/vm_event.c
>> +++ b/xen/arch/x86/vm_event.c
>> @@ -27,6 +27,13 @@ int vm_event_init_domain(struct domain *d)
>>  {
>>      struct vcpu *v;
>>  
>> +    d->arch.monitor_msr_bitmap = alloc_xenheap_page();
> 
> How about using vzalloc?
>> +
>> +    if ( !d->arch.monitor_msr_bitmap )
>> +        return -ENOMEM;
>> +
>> +    memset(d->arch.monitor_msr_bitmap, 0, PAGE_SIZE);
> 
> Then you don't have to do that.
> 
>> +
>>      for_each_vcpu ( d, v )
>>      {
>>          if ( v->arch.vm_event )
>> @@ -55,6 +62,9 @@ void vm_event_cleanup_domain(struct domain *d)
>>          v->arch.vm_event = NULL;
>>      }
>>  
>> +    free_xenheap_page(d->arch.monitor_msr_bitmap);
> 
> And this would be vfree.

I'll follow Andrew Cooper's requests here, which should address these
issues.

>> +    d->arch.monitor_msr_bitmap = NULL;
>> +
>>      d->arch.mem_access_emulate_each_rep = 0;
>>      memset(&d->arch.monitor, 0, sizeof(d->arch.monitor));
>>      memset(&d->monitor, 0, sizeof(d->monitor));
>> diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
>> index d393ed2..d8d91c2 100644
>> --- a/xen/include/asm-x86/domain.h
>> +++ b/xen/include/asm-x86/domain.h
>> @@ -398,12 +398,12 @@ struct arch_domain
>>          unsigned int write_ctrlreg_enabled       : 4;
>>          unsigned int write_ctrlreg_sync          : 4;
>>          unsigned int write_ctrlreg_onchangeonly  : 4;
>> -        unsigned int mov_to_msr_enabled          : 1;
>> -        unsigned int mov_to_msr_extended         : 1;
>>          unsigned int singlestep_enabled          : 1;
>>          unsigned int software_breakpoint_enabled : 1;
>>      } monitor;
>>  
>> +    unsigned long *monitor_msr_bitmap;
>> +
>>      /* Mem_access emulation control */
>>      bool_t mem_access_emulate_each_rep;
>>  
>> diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
>> index 7b7ff3f..9d1c0ef 100644
>> --- a/xen/include/asm-x86/hvm/hvm.h
>> +++ b/xen/include/asm-x86/hvm/hvm.h
>> @@ -211,7 +211,7 @@ struct hvm_function_table {
>>                                    uint32_t *eax, uint32_t *ebx,
>>                                    uint32_t *ecx, uint32_t *edx);
>>  
>> -    void (*enable_msr_exit_interception)(struct domain *d);
>> +    void (*enable_msr_interception)(struct domain *d, uint32_t msr);
>>      bool_t (*is_singlestep_supported)(void);
>>      int (*set_mode)(struct vcpu *v, int mode);
>>  
>> @@ -565,11 +565,11 @@ static inline enum hvm_intblk nhvm_interrupt_blocked(struct vcpu *v)
>>      return hvm_funcs.nhvm_intr_blocked(v);
>>  }
>>  
>> -static inline bool_t hvm_enable_msr_exit_interception(struct domain *d)
>> +static inline bool_t hvm_enable_msr_interception(struct domain *d, uint32_t msr)
>>  {
>> -    if ( hvm_funcs.enable_msr_exit_interception )
>> +    if ( hvm_funcs.enable_msr_interception )
>>      {
>> -        hvm_funcs.enable_msr_exit_interception(d);
>> +        hvm_funcs.enable_msr_interception(d, msr);
>>          return 1;
>>      }
>>  
>> diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
>> index b54f52f..7bf5326 100644
>> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
>> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
>> @@ -562,13 +562,6 @@ enum vmcs_field {
>>      HOST_RIP                        = 0x00006c16,
>>  };
>>  
>> -/*
>> - * A set of MSR-s that need to be enabled for memory introspection
>> - * to work.
>> - */
>> -extern const u32 vmx_introspection_force_enabled_msrs[];
>> -extern const unsigned int vmx_introspection_force_enabled_msrs_size;
>> -
>>  #define VMCS_VPID_WIDTH 16
>>  
>>  #define MSR_TYPE_R 1
>> diff --git a/xen/include/asm-x86/monitor.h b/xen/include/asm-x86/monitor.h
>> index 0954b59..74e5b1b 100644
>> --- a/xen/include/asm-x86/monitor.h
>> +++ b/xen/include/asm-x86/monitor.h
>> @@ -50,4 +50,6 @@ int arch_monitor_domctl_op(struct domain *d, struct xen_domctl_monitor_op *mop)
>>  int arch_monitor_domctl_event(struct domain *d,
>>                                struct xen_domctl_monitor_op *mop);
>>  
>> +bool_t arch_monitor_is_msr_enabled(const struct domain *d, u32 msr);
>> +
>>  #endif /* __ASM_X86_MONITOR_H__ */
>> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
>> index 2457698..875c09a 100644
>> --- a/xen/include/public/domctl.h
>> +++ b/xen/include/public/domctl.h
>> @@ -1107,8 +1107,7 @@ struct xen_domctl_monitor_op {
>>          } mov_to_cr;
>>  
>>          struct {
>> -            /* Enable the capture of an extended set of MSRs */
>> -            uint8_t extended_capture;
>> +            uint32_t msr;
> 
> Whoa there. Isn't it expanding the structure? Will this be backwards
> compatible? What if somebody is using an older version of xen-access
> against this hypervisor? Will they work?
> 
> Perhaps this should have a new struct / sub-ops? And the old
> 'mov_to_msr' will just re-use this new fangled code?

In addition to Andrew's comments, I think simply changing
VM_EVENT_INTERFACE_VERSION should be enough for xen-access-like clients
to figure out the incompatibility.


Thanks,
Razvan
Tamas K Lengyel April 13, 2016, 2:50 p.m. UTC | #4
> diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
> index 1fec412..4c96968 100644
> --- a/xen/arch/x86/monitor.c
> +++ b/xen/arch/x86/monitor.c
> @@ -22,6 +22,58 @@
>  #include <asm/monitor.h>
>  #include <public/vm_event.h>
>
> +static int arch_monitor_enable_msr(struct domain *d, u32 msr)
>

IMHO there is no need to prepend the function names here with arch_ as
these are x86 specific so there never will be ARM equivalent.


> +{
> +    if ( !d->arch.monitor_msr_bitmap )
> +        return -EINVAL;
> +
> +    if ( msr <= 0x1fff )
> +        set_bit(msr, d->arch.monitor_msr_bitmap + 0x000/BYTES_PER_LONG);
> +    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
> +    {
> +        msr &= 0x1fff;
> +        set_bit(msr, d->arch.monitor_msr_bitmap + 0x400/BYTES_PER_LONG);
> +    }
> +
> +    hvm_enable_msr_interception(d, msr);
> +
> +    return 0;
> +}
> +
> +static int arch_monitor_disable_msr(struct domain *d, u32 msr)
> +{
> +    if ( !d->arch.monitor_msr_bitmap )
> +        return -EINVAL;
> +
> +    if ( msr <= 0x1fff )
> +        clear_bit(msr, d->arch.monitor_msr_bitmap + 0x000/BYTES_PER_LONG);
> +    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
> +    {
> +        msr &= 0x1fff;
> +        clear_bit(msr, d->arch.monitor_msr_bitmap + 0x400/BYTES_PER_LONG);
> +    }
> +
> +    return 0;
> +}
> +
> +bool_t arch_monitor_is_msr_enabled(const struct domain *d, u32 msr)
> +{
> +    bool_t rc = 0;
> +
> +    if ( !d->arch.monitor_msr_bitmap )
> +        return 0;
> +
> +    if ( msr <= 0x1fff )
> +        rc = test_bit(msr, d->arch.monitor_msr_bitmap +
> 0x000/BYTES_PER_LONG);
> +    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
> +    {
> +        msr &= 0x1fff;
> +        rc = test_bit(msr, d->arch.monitor_msr_bitmap +
> 0x400/BYTES_PER_LONG);
> +    }
> +
> +    return rc;
> +}
> +
>  int arch_monitor_domctl_event(struct domain *d,
>                                struct xen_domctl_monitor_op *mop)
>  {
Razvan Cojocaru April 13, 2016, 2:52 p.m. UTC | #5
On 04/13/2016 05:50 PM, Tamas K Lengyel wrote:
> 
>     diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
>     index 1fec412..4c96968 100644
>     --- a/xen/arch/x86/monitor.c
>     +++ b/xen/arch/x86/monitor.c
>     @@ -22,6 +22,58 @@
>      #include <asm/monitor.h>
>      #include <public/vm_event.h>
> 
>     +static int arch_monitor_enable_msr(struct domain *d, u32 msr)
> 
> 
> IMHO there is no need to prepend the function names here with arch_ as
> these are x86 specific so there never will be ARM equivalent.

That's true. I'll remove the prefix.


Thanks,
Razvan
Tamas K Lengyel April 13, 2016, 2:52 p.m. UTC | #6
> >> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> >> index 2457698..875c09a 100644
> >> --- a/xen/include/public/domctl.h
> >> +++ b/xen/include/public/domctl.h
> >> @@ -1107,8 +1107,7 @@ struct xen_domctl_monitor_op {
> >>          } mov_to_cr;
> >>
> >>          struct {
> >> -            /* Enable the capture of an extended set of MSRs */
> >> -            uint8_t extended_capture;
> >> +            uint32_t msr;
> >
> > Whoa there. Isn't it expanding the structure? Will this be backwards
> > compatible? What if somebody is using an older version of xen-access
> > against this hypervisor? Will they work?
> >
> > Perhaps this should have a new struct / sub-ops? And the old
> > 'mov_to_msr' will just re-use this new fangled code?
>
> In addition to Andrew's comments, I think simply changing
> VM_EVENT_INTERFACE_VERSION should be enough for xen-access-like clients
> to figure out the incompatibility.
>


This is an independent system from VM_EVENT, so IMHO the two shouldn't be
mixed. The union size right now is 24-bits so if a uint16_t is enough for
the bitmask that should be used instead. That way we don't end up growing
the struct size.

Tamas
Razvan Cojocaru April 13, 2016, 2:56 p.m. UTC | #7
On 04/13/2016 05:52 PM, Tamas K Lengyel wrote:
> 
>     >> diff --git a/xen/include/public/domctl.h
>     b/xen/include/public/domctl.h
>     >> index 2457698..875c09a 100644
>     >> --- a/xen/include/public/domctl.h
>     >> +++ b/xen/include/public/domctl.h
>     >> @@ -1107,8 +1107,7 @@ struct xen_domctl_monitor_op {
>     >>          } mov_to_cr;
>     >>
>     >>          struct {
>     >> -            /* Enable the capture of an extended set of MSRs */
>     >> -            uint8_t extended_capture;
>     >> +            uint32_t msr;
>     >
>     > Whoa there. Isn't it expanding the structure? Will this be backwards
>     > compatible? What if somebody is using an older version of xen-access
>     > against this hypervisor? Will they work?
>     >
>     > Perhaps this should have a new struct / sub-ops? And the old
>     > 'mov_to_msr' will just re-use this new fangled code?
> 
>     In addition to Andrew's comments, I think simply changing
>     VM_EVENT_INTERFACE_VERSION should be enough for xen-access-like clients
>     to figure out the incompatibility.
> 
> 
> 
> This is an independent system from VM_EVENT, so IMHO the two shouldn't
> be mixed. The union size right now is 24-bits so if a uint16_t is enough
> for the bitmask that should be used instead. That way we don't end up
> growing the struct size.

Right. Well, MSR-s seem to be passed around as 32-bit unsigned integers
everywhere in the Xen source code, so unless that also needs correcting
then unfortunately it'll have to grow.


Thanks,
Razvan
Andrew Cooper April 13, 2016, 3:01 p.m. UTC | #8
On 13/04/16 15:56, Razvan Cojocaru wrote:
> On 04/13/2016 05:52 PM, Tamas K Lengyel wrote:
>>     >> diff --git a/xen/include/public/domctl.h
>>     b/xen/include/public/domctl.h
>>     >> index 2457698..875c09a 100644
>>     >> --- a/xen/include/public/domctl.h
>>     >> +++ b/xen/include/public/domctl.h
>>     >> @@ -1107,8 +1107,7 @@ struct xen_domctl_monitor_op {
>>     >>          } mov_to_cr;
>>     >>
>>     >>          struct {
>>     >> -            /* Enable the capture of an extended set of MSRs */
>>     >> -            uint8_t extended_capture;
>>     >> +            uint32_t msr;
>>     >
>>     > Whoa there. Isn't it expanding the structure? Will this be backwards
>>     > compatible? What if somebody is using an older version of xen-access
>>     > against this hypervisor? Will they work?
>>     >
>>     > Perhaps this should have a new struct / sub-ops? And the old
>>     > 'mov_to_msr' will just re-use this new fangled code?
>>
>>     In addition to Andrew's comments, I think simply changing
>>     VM_EVENT_INTERFACE_VERSION should be enough for xen-access-like clients
>>     to figure out the incompatibility.
>>
>>
>>
>> This is an independent system from VM_EVENT, so IMHO the two shouldn't
>> be mixed. The union size right now is 24-bits so if a uint16_t is enough
>> for the bitmask that should be used instead. That way we don't end up
>> growing the struct size.
> Right. Well, MSR-s seem to be passed around as 32-bit unsigned integers
> everywhere in the Xen source code, so unless that also needs correcting
> then unfortunately it'll have to grow.

MSR indices are always 32bits wide, as they live specifically in %ecx
when encoded for instructions.

Only 2K MSRs are currently specified in hardware, with some extra ones
in the hypervisor range, but this doesn't mean that list won't grow in
the future.

~Andrew
Tamas K Lengyel April 13, 2016, 3:05 p.m. UTC | #9
On Wed, Apr 13, 2016 at 9:01 AM, Andrew Cooper <andrew.cooper3@citrix.com>
wrote:

> On 13/04/16 15:56, Razvan Cojocaru wrote:
> > On 04/13/2016 05:52 PM, Tamas K Lengyel wrote:
> >>     >> diff --git a/xen/include/public/domctl.h
> >>     b/xen/include/public/domctl.h
> >>     >> index 2457698..875c09a 100644
> >>     >> --- a/xen/include/public/domctl.h
> >>     >> +++ b/xen/include/public/domctl.h
> >>     >> @@ -1107,8 +1107,7 @@ struct xen_domctl_monitor_op {
> >>     >>          } mov_to_cr;
> >>     >>
> >>     >>          struct {
> >>     >> -            /* Enable the capture of an extended set of MSRs */
> >>     >> -            uint8_t extended_capture;
> >>     >> +            uint32_t msr;
> >>     >
> >>     > Whoa there. Isn't it expanding the structure? Will this be
> backwards
> >>     > compatible? What if somebody is using an older version of
> xen-access
> >>     > against this hypervisor? Will they work?
> >>     >
> >>     > Perhaps this should have a new struct / sub-ops? And the old
> >>     > 'mov_to_msr' will just re-use this new fangled code?
> >>
> >>     In addition to Andrew's comments, I think simply changing
> >>     VM_EVENT_INTERFACE_VERSION should be enough for xen-access-like
> clients
> >>     to figure out the incompatibility.
> >>
> >>
> >>
> >> This is an independent system from VM_EVENT, so IMHO the two shouldn't
> >> be mixed. The union size right now is 24-bits so if a uint16_t is enough
> >> for the bitmask that should be used instead. That way we don't end up
> >> growing the struct size.
> > Right. Well, MSR-s seem to be passed around as 32-bit unsigned integers
> > everywhere in the Xen source code, so unless that also needs correcting
> > then unfortunately it'll have to grow.
>
> MSR indices are always 32bits wide, as they live specifically in %ecx
> when encoded for instructions.
>
> Only 2K MSRs are currently specified in hardware, with some extra ones
> in the hypervisor range, but this doesn't mean that list won't grow in
> the future.
>

Yea, well then we need to introduce a new struct with a new subop to pass
the bitmask. I guess its a lesson in ABI design to leave some wiggle room
for future-proofing it (my bad). So I guess we can introduce
XEN_DOMCTL_MONITOR_OP_ENABLE_V2 and struct xen_domctl_monitor_op_v2 where
say expand the union to uint64_t just in case?

Tamas
Razvan Cojocaru April 14, 2016, 9:37 a.m. UTC | #10
On 04/13/2016 06:05 PM, Tamas K Lengyel wrote:
> 
> 
> On Wed, Apr 13, 2016 at 9:01 AM, Andrew Cooper
> <andrew.cooper3@citrix.com <mailto:andrew.cooper3@citrix.com>> wrote:
> 
>     On 13/04/16 15:56, Razvan Cojocaru wrote:
>     > On 04/13/2016 05:52 PM, Tamas K Lengyel wrote:
>     >>     >> diff --git a/xen/include/public/domctl.h
>     >>     b/xen/include/public/domctl.h
>     >>     >> index 2457698..875c09a 100644
>     >>     >> --- a/xen/include/public/domctl.h
>     >>     >> +++ b/xen/include/public/domctl.h
>     >>     >> @@ -1107,8 +1107,7 @@ struct xen_domctl_monitor_op {
>     >>     >>          } mov_to_cr;
>     >>     >>
>     >>     >>          struct {
>     >>     >> -            /* Enable the capture of an extended set of
>     MSRs */
>     >>     >> -            uint8_t extended_capture;
>     >>     >> +            uint32_t msr;
>     >>     >
>     >>     > Whoa there. Isn't it expanding the structure? Will this be
>     backwards
>     >>     > compatible? What if somebody is using an older version of
>     xen-access
>     >>     > against this hypervisor? Will they work?
>     >>     >
>     >>     > Perhaps this should have a new struct / sub-ops? And the old
>     >>     > 'mov_to_msr' will just re-use this new fangled code?
>     >>
>     >>     In addition to Andrew's comments, I think simply changing
>     >>     VM_EVENT_INTERFACE_VERSION should be enough for
>     xen-access-like clients
>     >>     to figure out the incompatibility.
>     >>
>     >>
>     >>
>     >> This is an independent system from VM_EVENT, so IMHO the two
>     shouldn't
>     >> be mixed. The union size right now is 24-bits so if a uint16_t is
>     enough
>     >> for the bitmask that should be used instead. That way we don't end up
>     >> growing the struct size.
>     > Right. Well, MSR-s seem to be passed around as 32-bit unsigned
>     integers
>     > everywhere in the Xen source code, so unless that also needs
>     correcting
>     > then unfortunately it'll have to grow.
> 
>     MSR indices are always 32bits wide, as they live specifically in %ecx
>     when encoded for instructions.
> 
>     Only 2K MSRs are currently specified in hardware, with some extra ones
>     in the hypervisor range, but this doesn't mean that list won't grow in
>     the future.
> 
> 
> Yea, well then we need to introduce a new struct with a new subop to
> pass the bitmask. I guess its a lesson in ABI design to leave some
> wiggle room for future-proofing it (my bad). So I guess we can introduce
> XEN_DOMCTL_MONITOR_OP_ENABLE_V2 and struct xen_domctl_monitor_op_v2
> where say expand the union to uint64_t just in case?

I can do that, but it would seem that this is somewhat at odds with
Andrew Cooper's perspective - he has stated that it's within the rules
and the domctl can be changed without there being the need for
XEN_DOMCTL_MONITOR_OP_ENABLE_V2. So this should be clarified, please,
otherwise I'm incurring the risk of changing the code only to have to
revert it later.


Thanks,
Razvan
Jan Beulich April 14, 2016, 3:20 p.m. UTC | #11
>>> Razvan Cojocaru <rcojocaru@bitdefender.com> 04/14/16 11:37 AM >>>
>On 04/13/2016 06:05 PM, Tamas K Lengyel wrote:
>> 
>> Yea, well then we need to introduce a new struct with a new subop to
>> pass the bitmask. I guess its a lesson in ABI design to leave some
>> wiggle room for future-proofing it (my bad). So I guess we can introduce
>> XEN_DOMCTL_MONITOR_OP_ENABLE_V2 and struct xen_domctl_monitor_op_v2
>> where say expand the union to uint64_t just in case?
>
>I can do that, but it would seem that this is somewhat at odds with
>Andrew Cooper's perspective - he has stated that it's within the rules
>and the domctl can be changed without there being the need for
>XEN_DOMCTL_MONITOR_OP_ENABLE_V2. So this should be clarified, please,
>otherwise I'm incurring the risk of changing the code only to have to
>revert it later.

You basically have two options - the new sub-op or changing the existing
one while (if not already done so in a dev cycle) bumping the domctl
interface version.

Jan
Tamas K Lengyel April 14, 2016, 3:33 p.m. UTC | #12
On Thu, Apr 14, 2016 at 9:20 AM, Jan Beulich <jbeulich@suse.com> wrote:

> >>> Razvan Cojocaru <rcojocaru@bitdefender.com> 04/14/16 11:37 AM >>>
> >On 04/13/2016 06:05 PM, Tamas K Lengyel wrote:
> >>
> >> Yea, well then we need to introduce a new struct with a new subop to
> >> pass the bitmask. I guess its a lesson in ABI design to leave some
> >> wiggle room for future-proofing it (my bad). So I guess we can introduce
> >> XEN_DOMCTL_MONITOR_OP_ENABLE_V2 and struct xen_domctl_monitor_op_v2
> >> where say expand the union to uint64_t just in case?
> >
> >I can do that, but it would seem that this is somewhat at odds with
> >Andrew Cooper's perspective - he has stated that it's within the rules
> >and the domctl can be changed without there being the need for
> >XEN_DOMCTL_MONITOR_OP_ENABLE_V2. So this should be clarified, please,
> >otherwise I'm incurring the risk of changing the code only to have to
> >revert it later.
>
> You basically have two options - the new sub-op or changing the existing
> one while (if not already done so in a dev cycle) bumping the domctl
> interface version.


If bumping the domctl version is not too much hassle I think that would be
the easiest.

Tamas
Razvan Cojocaru April 14, 2016, 3:37 p.m. UTC | #13
On 04/14/2016 06:33 PM, Tamas K Lengyel wrote:
> 
> 
> On Thu, Apr 14, 2016 at 9:20 AM, Jan Beulich <jbeulich@suse.com
> <mailto:jbeulich@suse.com>> wrote:
> 
>     >>> Razvan Cojocaru <rcojocaru@bitdefender.com
>     <mailto:rcojocaru@bitdefender.com>> 04/14/16 11:37 AM >>>
>     >On 04/13/2016 06:05 PM, Tamas K Lengyel wrote:
>     >>
>     >> Yea, well then we need to introduce a new struct with a new subop to
>     >> pass the bitmask. I guess its a lesson in ABI design to leave some
>     >> wiggle room for future-proofing it (my bad). So I guess we can introduce
>     >> XEN_DOMCTL_MONITOR_OP_ENABLE_V2 and struct xen_domctl_monitor_op_v2
>     >> where say expand the union to uint64_t just in case?
>     >
>     >I can do that, but it would seem that this is somewhat at odds with
>     >Andrew Cooper's perspective - he has stated that it's within the rules
>     >and the domctl can be changed without there being the need for
>     >XEN_DOMCTL_MONITOR_OP_ENABLE_V2. So this should be clarified, please,
>     >otherwise I'm incurring the risk of changing the code only to have to
>     >revert it later.
> 
>     You basically have two options - the new sub-op or changing the existing
>     one while (if not already done so in a dev cycle) bumping the domctl
>     interface version.
> 
> 
> If bumping the domctl version is not too much hassle I think that would
> be the easiest.

Fair enough, I'll look into that option then.


Thanks,
Razvan
diff mbox

Patch

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index f5a034a..9698d46 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2183,8 +2183,8 @@  int xc_monitor_get_capabilities(xc_interface *xch, domid_t domain_id,
 int xc_monitor_write_ctrlreg(xc_interface *xch, domid_t domain_id,
                              uint16_t index, bool enable, bool sync,
                              bool onchangeonly);
-int xc_monitor_mov_to_msr(xc_interface *xch, domid_t domain_id, bool enable,
-                          bool extended_capture);
+int xc_monitor_mov_to_msr(xc_interface *xch, domid_t domain_id, uint32_t msr,
+                          bool enable);
 int xc_monitor_singlestep(xc_interface *xch, domid_t domain_id, bool enable);
 int xc_monitor_software_breakpoint(xc_interface *xch, domid_t domain_id,
                                    bool enable);
diff --git a/tools/libxc/xc_monitor.c b/tools/libxc/xc_monitor.c
index b1705dd..78131b2 100644
--- a/tools/libxc/xc_monitor.c
+++ b/tools/libxc/xc_monitor.c
@@ -86,8 +86,8 @@  int xc_monitor_write_ctrlreg(xc_interface *xch, domid_t domain_id,
     return do_domctl(xch, &domctl);
 }
 
-int xc_monitor_mov_to_msr(xc_interface *xch, domid_t domain_id, bool enable,
-                          bool extended_capture)
+int xc_monitor_mov_to_msr(xc_interface *xch, domid_t domain_id, uint32_t msr,
+                          bool enable)
 {
     DECLARE_DOMCTL;
 
@@ -96,7 +96,7 @@  int xc_monitor_mov_to_msr(xc_interface *xch, domid_t domain_id, bool enable,
     domctl.u.monitor_op.op = enable ? XEN_DOMCTL_MONITOR_OP_ENABLE
                                     : XEN_DOMCTL_MONITOR_OP_DISABLE;
     domctl.u.monitor_op.event = XEN_DOMCTL_MONITOR_EVENT_MOV_TO_MSR;
-    domctl.u.monitor_op.u.mov_to_msr.extended_capture = extended_capture;
+    domctl.u.monitor_op.u.mov_to_msr.msr = msr;
 
     return do_domctl(xch, &domctl);
 }
diff --git a/xen/arch/x86/hvm/event.c b/xen/arch/x86/hvm/event.c
index 56c5514..015910b 100644
--- a/xen/arch/x86/hvm/event.c
+++ b/xen/arch/x86/hvm/event.c
@@ -57,9 +57,8 @@  bool_t hvm_event_cr(unsigned int index, unsigned long value, unsigned long old)
 void hvm_event_msr(unsigned int msr, uint64_t value)
 {
     struct vcpu *curr = current;
-    struct arch_domain *ad = &curr->domain->arch;
 
-    if ( ad->monitor.mov_to_msr_enabled )
+    if ( arch_monitor_is_msr_enabled(curr->domain, msr) )
     {
         vm_event_request_t req = {
             .reason = VM_EVENT_REASON_MOV_TO_MSR,
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index f24126d..7c2a98c 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3695,7 +3695,6 @@  int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content,
     bool_t mtrr;
     unsigned int edx, index;
     int ret = X86EMUL_OKAY;
-    struct arch_domain *currad = &current->domain->arch;
 
     HVMTRACE_3D(MSR_WRITE, msr,
                (uint32_t)msr_content, (uint32_t)(msr_content >> 32));
@@ -3703,7 +3702,7 @@  int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content,
     hvm_cpuid(1, NULL, NULL, NULL, &edx);
     mtrr = !!(edx & cpufeat_mask(X86_FEATURE_MTRR));
 
-    if ( may_defer && unlikely(currad->monitor.mov_to_msr_enabled) )
+    if ( may_defer && unlikely(arch_monitor_is_msr_enabled(v->domain, msr)) )
     {
         ASSERT(v->arch.vm_event);
 
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 8284281..f92f4b8 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -37,6 +37,7 @@ 
 #include <asm/hvm/vmx/vvmx.h>
 #include <asm/hvm/vmx/vmcs.h>
 #include <asm/flushtlb.h>
+#include <asm/monitor.h>
 #include <asm/shadow.h>
 #include <asm/tboot.h>
 #include <asm/apic.h>
@@ -108,18 +109,6 @@  u64 vmx_ept_vpid_cap __read_mostly;
 u64 vmx_vmfunc __read_mostly;
 bool_t vmx_virt_exception __read_mostly;
 
-const u32 vmx_introspection_force_enabled_msrs[] = {
-    MSR_IA32_SYSENTER_EIP,
-    MSR_IA32_SYSENTER_ESP,
-    MSR_IA32_SYSENTER_CS,
-    MSR_IA32_MC0_CTL,
-    MSR_STAR,
-    MSR_LSTAR
-};
-
-const unsigned int vmx_introspection_force_enabled_msrs_size =
-    ARRAY_SIZE(vmx_introspection_force_enabled_msrs);
-
 static DEFINE_PER_CPU_READ_MOSTLY(paddr_t, vmxon_region);
 static DEFINE_PER_CPU(paddr_t, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, active_vmcs_list);
@@ -810,17 +799,8 @@  void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type)
     if ( msr_bitmap == NULL )
         return;
 
-    if ( unlikely(d->arch.monitor.mov_to_msr_enabled &&
-                  d->arch.monitor.mov_to_msr_extended) &&
-         vm_event_check_ring(&d->vm_event->monitor) )
-    {
-        unsigned int i;
-
-        /* Filter out MSR-s needed for memory introspection */
-        for ( i = 0; i < vmx_introspection_force_enabled_msrs_size; i++ )
-            if ( msr == vmx_introspection_force_enabled_msrs[i] )
-                return;
-    }
+    if ( unlikely(arch_monitor_is_msr_enabled(d, msr)) )
+        return;
 
     /*
      * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index bc4410f..9135441 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1958,16 +1958,12 @@  void vmx_hypervisor_cpuid_leaf(uint32_t sub_idx,
         *eax |= XEN_HVM_CPUID_X2APIC_VIRT;
 }
 
-static void vmx_enable_msr_exit_interception(struct domain *d)
+static void vmx_enable_msr_interception(struct domain *d, uint32_t msr)
 {
     struct vcpu *v;
-    unsigned int i;
 
-    /* Enable interception for MSRs needed for memory introspection. */
     for_each_vcpu ( d, v )
-        for ( i = 0; i < vmx_introspection_force_enabled_msrs_size; i++ )
-            vmx_enable_intercept_for_msr(v, vmx_introspection_force_enabled_msrs[i],
-                                         MSR_TYPE_W);
+        vmx_enable_intercept_for_msr(v, msr, MSR_TYPE_W);
 }
 
 static bool_t vmx_is_singlestep_supported(void)
@@ -2166,7 +2162,7 @@  static struct hvm_function_table __initdata vmx_function_table = {
     .handle_eoi           = vmx_handle_eoi,
     .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
     .hypervisor_cpuid_leaf = vmx_hypervisor_cpuid_leaf,
-    .enable_msr_exit_interception = vmx_enable_msr_exit_interception,
+    .enable_msr_interception = vmx_enable_msr_interception,
     .is_singlestep_supported = vmx_is_singlestep_supported,
     .set_mode = vmx_set_mode,
     .altp2m_vcpu_update_p2m = vmx_vcpu_update_eptp,
diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
index 1fec412..4c96968 100644
--- a/xen/arch/x86/monitor.c
+++ b/xen/arch/x86/monitor.c
@@ -22,6 +22,58 @@ 
 #include <asm/monitor.h>
 #include <public/vm_event.h>
 
+static int arch_monitor_enable_msr(struct domain *d, u32 msr)
+{
+    if ( !d->arch.monitor_msr_bitmap )
+        return -EINVAL;
+
+    if ( msr <= 0x1fff )
+        set_bit(msr, d->arch.monitor_msr_bitmap + 0x000/BYTES_PER_LONG);
+    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
+    {
+        msr &= 0x1fff;
+        set_bit(msr, d->arch.monitor_msr_bitmap + 0x400/BYTES_PER_LONG);
+    }
+
+    hvm_enable_msr_interception(d, msr);
+
+    return 0;
+}
+
+static int arch_monitor_disable_msr(struct domain *d, u32 msr)
+{
+    if ( !d->arch.monitor_msr_bitmap )
+        return -EINVAL;
+
+    if ( msr <= 0x1fff )
+        clear_bit(msr, d->arch.monitor_msr_bitmap + 0x000/BYTES_PER_LONG);
+    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
+    {
+        msr &= 0x1fff;
+        clear_bit(msr, d->arch.monitor_msr_bitmap + 0x400/BYTES_PER_LONG);
+    }
+
+    return 0;
+}
+
+bool_t arch_monitor_is_msr_enabled(const struct domain *d, u32 msr)
+{
+    bool_t rc = 0;
+
+    if ( !d->arch.monitor_msr_bitmap )
+        return 0;
+
+    if ( msr <= 0x1fff )
+        rc = test_bit(msr, d->arch.monitor_msr_bitmap + 0x000/BYTES_PER_LONG);
+    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
+    {
+        msr &= 0x1fff;
+        rc = test_bit(msr, d->arch.monitor_msr_bitmap + 0x400/BYTES_PER_LONG);
+    }
+
+    return rc;
+}
+
 int arch_monitor_domctl_event(struct domain *d,
                               struct xen_domctl_monitor_op *mop)
 {
@@ -77,25 +129,28 @@  int arch_monitor_domctl_event(struct domain *d,
 
     case XEN_DOMCTL_MONITOR_EVENT_MOV_TO_MSR:
     {
-        bool_t old_status = ad->monitor.mov_to_msr_enabled;
+        bool_t old_status;
+        int rc;
+        u32 msr = mop->u.mov_to_msr.msr;
 
-        if ( unlikely(old_status == requested_status) )
-            return -EEXIST;
+        domain_pause(d);
 
-        if ( requested_status && mop->u.mov_to_msr.extended_capture &&
-             !hvm_enable_msr_exit_interception(d) )
-            return -EOPNOTSUPP;
+        old_status = arch_monitor_is_msr_enabled(d, msr);
 
-        domain_pause(d);
+        if ( unlikely(old_status == requested_status) )
+        {
+            domain_unpause(d);
+            return -EEXIST;
+        }
 
-        if ( requested_status && mop->u.mov_to_msr.extended_capture )
-            ad->monitor.mov_to_msr_extended = 1;
+        if ( requested_status )
+            rc = arch_monitor_enable_msr(d, msr);
         else
-            ad->monitor.mov_to_msr_extended = 0;
+            rc = arch_monitor_disable_msr(d, msr);
 
-        ad->monitor.mov_to_msr_enabled = requested_status;
         domain_unpause(d);
-        break;
+
+        return rc;
     }
 
     case XEN_DOMCTL_MONITOR_EVENT_SINGLESTEP:
diff --git a/xen/arch/x86/vm_event.c b/xen/arch/x86/vm_event.c
index 5635603..9b4267e 100644
--- a/xen/arch/x86/vm_event.c
+++ b/xen/arch/x86/vm_event.c
@@ -27,6 +27,13 @@  int vm_event_init_domain(struct domain *d)
 {
     struct vcpu *v;
 
+    d->arch.monitor_msr_bitmap = alloc_xenheap_page();
+
+    if ( !d->arch.monitor_msr_bitmap )
+        return -ENOMEM;
+
+    memset(d->arch.monitor_msr_bitmap, 0, PAGE_SIZE);
+
     for_each_vcpu ( d, v )
     {
         if ( v->arch.vm_event )
@@ -55,6 +62,9 @@  void vm_event_cleanup_domain(struct domain *d)
         v->arch.vm_event = NULL;
     }
 
+    free_xenheap_page(d->arch.monitor_msr_bitmap);
+    d->arch.monitor_msr_bitmap = NULL;
+
     d->arch.mem_access_emulate_each_rep = 0;
     memset(&d->arch.monitor, 0, sizeof(d->arch.monitor));
     memset(&d->monitor, 0, sizeof(d->monitor));
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index d393ed2..d8d91c2 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -398,12 +398,12 @@  struct arch_domain
         unsigned int write_ctrlreg_enabled       : 4;
         unsigned int write_ctrlreg_sync          : 4;
         unsigned int write_ctrlreg_onchangeonly  : 4;
-        unsigned int mov_to_msr_enabled          : 1;
-        unsigned int mov_to_msr_extended         : 1;
         unsigned int singlestep_enabled          : 1;
         unsigned int software_breakpoint_enabled : 1;
     } monitor;
 
+    unsigned long *monitor_msr_bitmap;
+
     /* Mem_access emulation control */
     bool_t mem_access_emulate_each_rep;
 
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 7b7ff3f..9d1c0ef 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -211,7 +211,7 @@  struct hvm_function_table {
                                   uint32_t *eax, uint32_t *ebx,
                                   uint32_t *ecx, uint32_t *edx);
 
-    void (*enable_msr_exit_interception)(struct domain *d);
+    void (*enable_msr_interception)(struct domain *d, uint32_t msr);
     bool_t (*is_singlestep_supported)(void);
     int (*set_mode)(struct vcpu *v, int mode);
 
@@ -565,11 +565,11 @@  static inline enum hvm_intblk nhvm_interrupt_blocked(struct vcpu *v)
     return hvm_funcs.nhvm_intr_blocked(v);
 }
 
-static inline bool_t hvm_enable_msr_exit_interception(struct domain *d)
+static inline bool_t hvm_enable_msr_interception(struct domain *d, uint32_t msr)
 {
-    if ( hvm_funcs.enable_msr_exit_interception )
+    if ( hvm_funcs.enable_msr_interception )
     {
-        hvm_funcs.enable_msr_exit_interception(d);
+        hvm_funcs.enable_msr_interception(d, msr);
         return 1;
     }
 
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index b54f52f..7bf5326 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -562,13 +562,6 @@  enum vmcs_field {
     HOST_RIP                        = 0x00006c16,
 };
 
-/*
- * A set of MSR-s that need to be enabled for memory introspection
- * to work.
- */
-extern const u32 vmx_introspection_force_enabled_msrs[];
-extern const unsigned int vmx_introspection_force_enabled_msrs_size;
-
 #define VMCS_VPID_WIDTH 16
 
 #define MSR_TYPE_R 1
diff --git a/xen/include/asm-x86/monitor.h b/xen/include/asm-x86/monitor.h
index 0954b59..74e5b1b 100644
--- a/xen/include/asm-x86/monitor.h
+++ b/xen/include/asm-x86/monitor.h
@@ -50,4 +50,6 @@  int arch_monitor_domctl_op(struct domain *d, struct xen_domctl_monitor_op *mop)
 int arch_monitor_domctl_event(struct domain *d,
                               struct xen_domctl_monitor_op *mop);
 
+bool_t arch_monitor_is_msr_enabled(const struct domain *d, u32 msr);
+
 #endif /* __ASM_X86_MONITOR_H__ */
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 2457698..875c09a 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -1107,8 +1107,7 @@  struct xen_domctl_monitor_op {
         } mov_to_cr;
 
         struct {
-            /* Enable the capture of an extended set of MSRs */
-            uint8_t extended_capture;
+            uint32_t msr;
         } mov_to_msr;
 
         struct {