diff mbox series

[v2,for-4.14,3/3] xen/vm_event: Add safe to disable vm_event

Message ID 682dde916f982e2889b2be2263418e9506a82c1e.1590028160.git.tamas@tklengyel.com (mailing list archive)
State New, archived
Headers show
Series vm_event: fix race-condition when disabling monitor events | expand

Commit Message

Tamas K Lengyel May 21, 2020, 2:31 a.m. UTC
Instead of having to repeatedly try to disable vm_events, request a specific
vm_event to be sent when the domain is safe to continue with shutting down
the vm_event interface.

Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
---
 xen/arch/x86/hvm/hvm.c            | 38 ++++++++++++++++++++++++++-----
 xen/arch/x86/hvm/monitor.c        | 14 ++++++++++++
 xen/arch/x86/monitor.c            | 13 +++++++++++
 xen/include/asm-x86/domain.h      |  1 +
 xen/include/asm-x86/hvm/monitor.h |  1 +
 xen/include/public/domctl.h       |  2 ++
 xen/include/public/vm_event.h     |  8 +++++++
 7 files changed, 71 insertions(+), 6 deletions(-)

Comments

Roger Pau Monné June 2, 2020, 12:54 p.m. UTC | #1
On Wed, May 20, 2020 at 08:31:54PM -0600, Tamas K Lengyel wrote:
> Instead of having to repeatedly try to disable vm_events,

Why not use a hypercall continuation instead so that this is all
hidden from the caller?

I take that the current interface requires the user to repeatedly
issue hypercalls in order to disable vm_events until one of those
succeeds?

> request a specific
> vm_event to be sent when the domain is safe to continue with shutting down
> the vm_event interface.
> 
> Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
> ---
>  xen/arch/x86/hvm/hvm.c            | 38 ++++++++++++++++++++++++++-----
>  xen/arch/x86/hvm/monitor.c        | 14 ++++++++++++
>  xen/arch/x86/monitor.c            | 13 +++++++++++
>  xen/include/asm-x86/domain.h      |  1 +
>  xen/include/asm-x86/hvm/monitor.h |  1 +
>  xen/include/public/domctl.h       |  2 ++
>  xen/include/public/vm_event.h     |  8 +++++++
>  7 files changed, 71 insertions(+), 6 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index e6780c685b..fc7e1e2b22 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -563,15 +563,41 @@ void hvm_do_resume(struct vcpu *v)
>          v->arch.hvm.inject_event.vector = HVM_EVENT_VECTOR_UNSET;
>      }
>  
> -    if ( unlikely(v->arch.vm_event) && v->arch.monitor.next_interrupt_enabled )
> +    if ( unlikely(v->arch.vm_event) )
>      {
> -        struct x86_event info;
> +        struct domain *d = v->domain;
> +
> +        if ( v->arch.monitor.next_interrupt_enabled )
> +        {
> +            struct x86_event info;
> +
> +            if ( hvm_get_pending_event(v, &info) )
> +            {
> +                hvm_monitor_interrupt(info.vector, info.type, info.error_code,
> +                                      info.cr2);
> +                v->arch.monitor.next_interrupt_enabled = false;
> +            }
> +        }
>  
> -        if ( hvm_get_pending_event(v, &info) )
> +        if ( d->arch.monitor.safe_to_disable )
>          {
> -            hvm_monitor_interrupt(info.vector, info.type, info.error_code,
> -                                  info.cr2);
> -            v->arch.monitor.next_interrupt_enabled = false;
> +            const struct vcpu *check_vcpu;
> +            bool pending_op = false;
> +
> +            for_each_vcpu ( d, check_vcpu )
> +            {
> +                if ( vm_event_check_pending_op(check_vcpu) )

Don't you need some kind of lock here, since you are poking at another
vCPU which could be modifying any of those bits?

> +                {
> +                    pending_op = true;
> +                    break;
> +                }
> +            }
> +
> +            if ( !pending_op )
> +            {
> +                hvm_monitor_safe_to_disable();
> +                d->arch.monitor.safe_to_disable = false;
> +            }
>          }
>      }
>  }
> diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c
> index f5d89e71d1..75fd1a4b68 100644
> --- a/xen/arch/x86/hvm/monitor.c
> +++ b/xen/arch/x86/hvm/monitor.c
> @@ -300,6 +300,20 @@ bool hvm_monitor_check_p2m(unsigned long gla, gfn_t gfn, uint32_t pfec,
>      return monitor_traps(curr, true, &req) >= 0;
>  }
>  
> +void hvm_monitor_safe_to_disable(void)
> +{
> +    struct vcpu *curr = current;
> +    struct arch_domain *ad = &curr->domain->arch;

const

> +    vm_event_request_t req = {};
> +
> +    if ( !ad->monitor.safe_to_disable )
> +        return;

Should this rather be an ASSERT? I don't think you are supposed to
call hvm_monitor_safe_to_disable when the bit is not set?

> +
> +    req.reason = VM_EVENT_REASON_SAFE_TO_DISABLE;

I think you cat set the field at definition time.

> +
> +    monitor_traps(curr, 0, &req);
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
> index 1517a97f50..86e0ba2fbc 100644
> --- a/xen/arch/x86/monitor.c
> +++ b/xen/arch/x86/monitor.c
> @@ -339,6 +339,19 @@ int arch_monitor_domctl_event(struct domain *d,
>          break;
>      }
>  
> +    case XEN_DOMCTL_MONITOR_EVENT_SAFE_TO_DISABLE:
> +    {
> +        bool old_status = ad->monitor.safe_to_disable;
> +
> +        if ( unlikely(old_status == requested_status) )
> +            return -EEXIST;
> +
> +        domain_pause(d);
> +        ad->monitor.safe_to_disable = requested_status;

Maybe I'm missing something, but I don't see any check that others
events are disabled before safe_to_disable is set?

In the same way, you should prevent setting any events when
safe_to_disable is set IMO, likely returning -EBUSY in both cases.

Thanks, Roger.
Tamas K Lengyel June 2, 2020, 1:06 p.m. UTC | #2
On Tue, Jun 2, 2020 at 6:54 AM Roger Pau Monné <roger.pau@citrix.com> wrote:
>
> On Wed, May 20, 2020 at 08:31:54PM -0600, Tamas K Lengyel wrote:
> > Instead of having to repeatedly try to disable vm_events,
>
> Why not use a hypercall continuation instead so that this is all
> hidden from the caller?
>
> I take that the current interface requires the user to repeatedly
> issue hypercalls in order to disable vm_events until one of those
> succeeds?

No, it succeeds right away. And then the guest crashes in unique and
unpredictable ways.

>
> > request a specific
> > vm_event to be sent when the domain is safe to continue with shutting down
> > the vm_event interface.
> >
> > Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
> > ---
> >  xen/arch/x86/hvm/hvm.c            | 38 ++++++++++++++++++++++++++-----
> >  xen/arch/x86/hvm/monitor.c        | 14 ++++++++++++
> >  xen/arch/x86/monitor.c            | 13 +++++++++++
> >  xen/include/asm-x86/domain.h      |  1 +
> >  xen/include/asm-x86/hvm/monitor.h |  1 +
> >  xen/include/public/domctl.h       |  2 ++
> >  xen/include/public/vm_event.h     |  8 +++++++
> >  7 files changed, 71 insertions(+), 6 deletions(-)
> >
> > diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> > index e6780c685b..fc7e1e2b22 100644
> > --- a/xen/arch/x86/hvm/hvm.c
> > +++ b/xen/arch/x86/hvm/hvm.c
> > @@ -563,15 +563,41 @@ void hvm_do_resume(struct vcpu *v)
> >          v->arch.hvm.inject_event.vector = HVM_EVENT_VECTOR_UNSET;
> >      }
> >
> > -    if ( unlikely(v->arch.vm_event) && v->arch.monitor.next_interrupt_enabled )
> > +    if ( unlikely(v->arch.vm_event) )
> >      {
> > -        struct x86_event info;
> > +        struct domain *d = v->domain;
> > +
> > +        if ( v->arch.monitor.next_interrupt_enabled )
> > +        {
> > +            struct x86_event info;
> > +
> > +            if ( hvm_get_pending_event(v, &info) )
> > +            {
> > +                hvm_monitor_interrupt(info.vector, info.type, info.error_code,
> > +                                      info.cr2);
> > +                v->arch.monitor.next_interrupt_enabled = false;
> > +            }
> > +        }
> >
> > -        if ( hvm_get_pending_event(v, &info) )
> > +        if ( d->arch.monitor.safe_to_disable )
> >          {
> > -            hvm_monitor_interrupt(info.vector, info.type, info.error_code,
> > -                                  info.cr2);
> > -            v->arch.monitor.next_interrupt_enabled = false;
> > +            const struct vcpu *check_vcpu;
> > +            bool pending_op = false;
> > +
> > +            for_each_vcpu ( d, check_vcpu )
> > +            {
> > +                if ( vm_event_check_pending_op(check_vcpu) )
>
> Don't you need some kind of lock here, since you are poking at another
> vCPU which could be modifying any of those bits?
>
> > +                {
> > +                    pending_op = true;
> > +                    break;
> > +                }
> > +            }
> > +
> > +            if ( !pending_op )
> > +            {
> > +                hvm_monitor_safe_to_disable();
> > +                d->arch.monitor.safe_to_disable = false;
> > +            }
> >          }
> >      }
> >  }
> > diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c
> > index f5d89e71d1..75fd1a4b68 100644
> > --- a/xen/arch/x86/hvm/monitor.c
> > +++ b/xen/arch/x86/hvm/monitor.c
> > @@ -300,6 +300,20 @@ bool hvm_monitor_check_p2m(unsigned long gla, gfn_t gfn, uint32_t pfec,
> >      return monitor_traps(curr, true, &req) >= 0;
> >  }
> >
> > +void hvm_monitor_safe_to_disable(void)
> > +{
> > +    struct vcpu *curr = current;
> > +    struct arch_domain *ad = &curr->domain->arch;
>
> const
>
> > +    vm_event_request_t req = {};
> > +
> > +    if ( !ad->monitor.safe_to_disable )
> > +        return;
>
> Should this rather be an ASSERT? I don't think you are supposed to
> call hvm_monitor_safe_to_disable when the bit is not set?
>
> > +
> > +    req.reason = VM_EVENT_REASON_SAFE_TO_DISABLE;
>
> I think you cat set the field at definition time.
>
> > +
> > +    monitor_traps(curr, 0, &req);
> > +}
> > +
> >  /*
> >   * Local variables:
> >   * mode: C
> > diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
> > index 1517a97f50..86e0ba2fbc 100644
> > --- a/xen/arch/x86/monitor.c
> > +++ b/xen/arch/x86/monitor.c
> > @@ -339,6 +339,19 @@ int arch_monitor_domctl_event(struct domain *d,
> >          break;
> >      }
> >
> > +    case XEN_DOMCTL_MONITOR_EVENT_SAFE_TO_DISABLE:
> > +    {
> > +        bool old_status = ad->monitor.safe_to_disable;
> > +
> > +        if ( unlikely(old_status == requested_status) )
> > +            return -EEXIST;
> > +
> > +        domain_pause(d);
> > +        ad->monitor.safe_to_disable = requested_status;
>
> Maybe I'm missing something, but I don't see any check that others
> events are disabled before safe_to_disable is set?
>
> In the same way, you should prevent setting any events when
> safe_to_disable is set IMO, likely returning -EBUSY in both cases.
>
> Thanks, Roger.

Thanks for the feedback again. I won't have the bandwidth to address
these so I'm dropping this patch. If Bitdefender is so inclined to
pick-up later they are welcome to do so. This is only needed if their
buggy feature is enabled.

Tamas
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index e6780c685b..fc7e1e2b22 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -563,15 +563,41 @@  void hvm_do_resume(struct vcpu *v)
         v->arch.hvm.inject_event.vector = HVM_EVENT_VECTOR_UNSET;
     }
 
-    if ( unlikely(v->arch.vm_event) && v->arch.monitor.next_interrupt_enabled )
+    if ( unlikely(v->arch.vm_event) )
     {
-        struct x86_event info;
+        struct domain *d = v->domain;
+
+        if ( v->arch.monitor.next_interrupt_enabled )
+        {
+            struct x86_event info;
+
+            if ( hvm_get_pending_event(v, &info) )
+            {
+                hvm_monitor_interrupt(info.vector, info.type, info.error_code,
+                                      info.cr2);
+                v->arch.monitor.next_interrupt_enabled = false;
+            }
+        }
 
-        if ( hvm_get_pending_event(v, &info) )
+        if ( d->arch.monitor.safe_to_disable )
         {
-            hvm_monitor_interrupt(info.vector, info.type, info.error_code,
-                                  info.cr2);
-            v->arch.monitor.next_interrupt_enabled = false;
+            const struct vcpu *check_vcpu;
+            bool pending_op = false;
+
+            for_each_vcpu ( d, check_vcpu )
+            {
+                if ( vm_event_check_pending_op(check_vcpu) )
+                {
+                    pending_op = true;
+                    break;
+                }
+            }
+
+            if ( !pending_op )
+            {
+                hvm_monitor_safe_to_disable();
+                d->arch.monitor.safe_to_disable = false;
+            }
         }
     }
 }
diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c
index f5d89e71d1..75fd1a4b68 100644
--- a/xen/arch/x86/hvm/monitor.c
+++ b/xen/arch/x86/hvm/monitor.c
@@ -300,6 +300,20 @@  bool hvm_monitor_check_p2m(unsigned long gla, gfn_t gfn, uint32_t pfec,
     return monitor_traps(curr, true, &req) >= 0;
 }
 
+void hvm_monitor_safe_to_disable(void)
+{
+    struct vcpu *curr = current;
+    struct arch_domain *ad = &curr->domain->arch;
+    vm_event_request_t req = {};
+
+    if ( !ad->monitor.safe_to_disable )
+        return;
+
+    req.reason = VM_EVENT_REASON_SAFE_TO_DISABLE;
+
+    monitor_traps(curr, 0, &req);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
index 1517a97f50..86e0ba2fbc 100644
--- a/xen/arch/x86/monitor.c
+++ b/xen/arch/x86/monitor.c
@@ -339,6 +339,19 @@  int arch_monitor_domctl_event(struct domain *d,
         break;
     }
 
+    case XEN_DOMCTL_MONITOR_EVENT_SAFE_TO_DISABLE:
+    {
+        bool old_status = ad->monitor.safe_to_disable;
+
+        if ( unlikely(old_status == requested_status) )
+            return -EEXIST;
+
+        domain_pause(d);
+        ad->monitor.safe_to_disable = requested_status;
+        domain_unpause(d);
+        break;
+    }
+
     default:
         /*
          * Should not be reached unless arch_monitor_get_capabilities() is
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index d890ab7a22..948b750c71 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -417,6 +417,7 @@  struct arch_domain
          */
         unsigned int inguest_pagefault_disabled                            : 1;
         unsigned int control_register_values                               : 1;
+        unsigned int safe_to_disable                                       : 1;
         struct monitor_msr_bitmap *msr_bitmap;
         uint64_t write_ctrlreg_mask[4];
     } monitor;
diff --git a/xen/include/asm-x86/hvm/monitor.h b/xen/include/asm-x86/hvm/monitor.h
index 66de24cb75..dbc113a635 100644
--- a/xen/include/asm-x86/hvm/monitor.h
+++ b/xen/include/asm-x86/hvm/monitor.h
@@ -52,6 +52,7 @@  bool hvm_monitor_emul_unimplemented(void);
 
 bool hvm_monitor_check_p2m(unsigned long gla, gfn_t gfn, uint32_t pfec,
                            uint16_t kind);
+void hvm_monitor_safe_to_disable(void);
 
 #endif /* __ASM_X86_HVM_MONITOR_H__ */
 
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index cbcd25f12c..247e809a6c 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -1040,6 +1040,8 @@  struct xen_domctl_psr_cmt_op {
 #define XEN_DOMCTL_MONITOR_EVENT_EMUL_UNIMPLEMENTED    10
 /* Enabled by default */
 #define XEN_DOMCTL_MONITOR_EVENT_INGUEST_PAGEFAULT     11
+/* Always async, disables automaticaly on first event */
+#define XEN_DOMCTL_MONITOR_EVENT_SAFE_TO_DISABLE       12
 
 struct xen_domctl_monitor_op {
     uint32_t op; /* XEN_DOMCTL_MONITOR_OP_* */
diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
index fdd3ad8a30..b66d2a8634 100644
--- a/xen/include/public/vm_event.h
+++ b/xen/include/public/vm_event.h
@@ -159,6 +159,14 @@ 
 #define VM_EVENT_REASON_DESCRIPTOR_ACCESS       13
 /* Current instruction is not implemented by the emulator */
 #define VM_EVENT_REASON_EMUL_UNIMPLEMENTED      14
+/*
+ * When shutting down vm_event it may not be immediately safe to complete the
+ * process as some vCPUs may be pending synchronization. This async event
+ * type can be used to receive a notification when its safe to finish disabling
+ * the vm_event interface. All other event types need to be disabled before
+ * registering to this one.
+ */
+#define VM_EVENT_REASON_SAFE_TO_DISABLE         15
 
 /* Supported values for the vm_event_write_ctrlreg index. */
 #define VM_EVENT_X86_CR0    0