diff mbox series

[v5,3/3] i386: Add notify VM exit support

Message ID 20220817020845.21855-4-chenyi.qiang@intel.com (mailing list archive)
State New, archived
Headers show
Series Enable notify VM exit | expand

Commit Message

Chenyi Qiang Aug. 17, 2022, 2:08 a.m. UTC
There are cases that malicious virtual machine can cause CPU stuck (due
to event windows don't open up), e.g., infinite loop in microcode when
nested #AC (CVE-2015-5307). No event window means no event (NMI, SMI and
IRQ) can be delivered. It leads the CPU to be unavailable to host or
other VMs. Notify VM exit is introduced to mitigate such kind of
attacks, which will generate a VM exit if no event window occurs in VM
non-root mode for a specified amount of time (notify window).

A new KVM capability KVM_CAP_X86_NOTIFY_VMEXIT is exposed to user space
so that the user can query the capability and set the expected notify
window when creating VMs. The format of the argument when enabling this
capability is as follows:
  Bit 63:32 - notify window specified in qemu command
  Bit 31:0  - some flags (e.g. KVM_X86_NOTIFY_VMEXIT_ENABLED is set to
              enable the feature.)

Because there are some concerns, e.g. a notify VM exit may happen with
VM_CONTEXT_INVALID set in exit qualification (no cases are anticipated
that would set this bit), which means VM context is corrupted. To avoid
the false positive and a well-behaved guest gets killed, make this
feature disabled by default. Users can enable the feature by a new
machine property:
    qemu -machine notify_vmexit=on,notify_window=0 ...

A new KVM exit reason KVM_EXIT_NOTIFY is defined for notify VM exit. If
it happens with VM_INVALID_CONTEXT, hypervisor exits to user space to
inform the fatal case. Then user space can inject a SHUTDOWN event to
the target vcpu. This is implemented by injecting a sythesized triple
fault event.

Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
---
 hw/i386/x86.c         | 45 +++++++++++++++++++++++++++++++++++++++++++
 include/hw/i386/x86.h |  5 +++++
 target/i386/kvm/kvm.c | 28 +++++++++++++++++++++++++++
 3 files changed, 78 insertions(+)

Comments

Chenyi Qiang Sept. 5, 2022, 1:05 a.m. UTC | #1
Hi Paolo,

Any comment on patch 2-3?

Thanks
Chenyi

On 8/17/2022 10:08 AM, Chenyi Qiang wrote:
> There are cases that malicious virtual machine can cause CPU stuck (due
> to event windows don't open up), e.g., infinite loop in microcode when
> nested #AC (CVE-2015-5307). No event window means no event (NMI, SMI and
> IRQ) can be delivered. It leads the CPU to be unavailable to host or
> other VMs. Notify VM exit is introduced to mitigate such kind of
> attacks, which will generate a VM exit if no event window occurs in VM
> non-root mode for a specified amount of time (notify window).
> 
> A new KVM capability KVM_CAP_X86_NOTIFY_VMEXIT is exposed to user space
> so that the user can query the capability and set the expected notify
> window when creating VMs. The format of the argument when enabling this
> capability is as follows:
>    Bit 63:32 - notify window specified in qemu command
>    Bit 31:0  - some flags (e.g. KVM_X86_NOTIFY_VMEXIT_ENABLED is set to
>                enable the feature.)
> 
> Because there are some concerns, e.g. a notify VM exit may happen with
> VM_CONTEXT_INVALID set in exit qualification (no cases are anticipated
> that would set this bit), which means VM context is corrupted. To avoid
> the false positive and a well-behaved guest gets killed, make this
> feature disabled by default. Users can enable the feature by a new
> machine property:
>      qemu -machine notify_vmexit=on,notify_window=0 ...
> 
> A new KVM exit reason KVM_EXIT_NOTIFY is defined for notify VM exit. If
> it happens with VM_INVALID_CONTEXT, hypervisor exits to user space to
> inform the fatal case. Then user space can inject a SHUTDOWN event to
> the target vcpu. This is implemented by injecting a sythesized triple
> fault event.
> 
> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> ---
>   hw/i386/x86.c         | 45 +++++++++++++++++++++++++++++++++++++++++++
>   include/hw/i386/x86.h |  5 +++++
>   target/i386/kvm/kvm.c | 28 +++++++++++++++++++++++++++
>   3 files changed, 78 insertions(+)
> 
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index 050eedc0c8..1eccbd3deb 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -1379,6 +1379,37 @@ static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name,
>       qapi_free_SgxEPCList(list);
>   }
>   
> +static bool x86_machine_get_notify_vmexit(Object *obj, Error **errp)
> +{
> +    X86MachineState *x86ms = X86_MACHINE(obj);
> +
> +    return x86ms->notify_vmexit;
> +}
> +
> +static void x86_machine_set_notify_vmexit(Object *obj, bool value, Error **errp)
> +{
> +    X86MachineState *x86ms = X86_MACHINE(obj);
> +
> +    x86ms->notify_vmexit = value;
> +}
> +
> +static void x86_machine_get_notify_window(Object *obj, Visitor *v,
> +                                const char *name, void *opaque, Error **errp)
> +{
> +    X86MachineState *x86ms = X86_MACHINE(obj);
> +    uint32_t notify_window = x86ms->notify_window;
> +
> +    visit_type_uint32(v, name, &notify_window, errp);
> +}
> +
> +static void x86_machine_set_notify_window(Object *obj, Visitor *v,
> +                               const char *name, void *opaque, Error **errp)
> +{
> +    X86MachineState *x86ms = X86_MACHINE(obj);
> +
> +    visit_type_uint32(v, name, &x86ms->notify_window, errp);
> +}
> +
>   static void x86_machine_initfn(Object *obj)
>   {
>       X86MachineState *x86ms = X86_MACHINE(obj);
> @@ -1392,6 +1423,8 @@ static void x86_machine_initfn(Object *obj)
>       x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
>       x86ms->bus_lock_ratelimit = 0;
>       x86ms->above_4g_mem_start = 4 * GiB;
> +    x86ms->notify_vmexit = false;
> +    x86ms->notify_window = 0;
>   }
>   
>   static void x86_machine_class_init(ObjectClass *oc, void *data)
> @@ -1461,6 +1494,18 @@ static void x86_machine_class_init(ObjectClass *oc, void *data)
>           NULL, NULL);
>       object_class_property_set_description(oc, "sgx-epc",
>           "SGX EPC device");
> +
> +    object_class_property_add(oc, X86_MACHINE_NOTIFY_WINDOW, "uint32_t",
> +                              x86_machine_get_notify_window,
> +                              x86_machine_set_notify_window, NULL, NULL);
> +    object_class_property_set_description(oc, X86_MACHINE_NOTIFY_WINDOW,
> +            "Set the notify window required by notify VM exit");
> +
> +    object_class_property_add_bool(oc, X86_MACHINE_NOTIFY_VMEXIT,
> +                                   x86_machine_get_notify_vmexit,
> +                                   x86_machine_set_notify_vmexit);
> +    object_class_property_set_description(oc, X86_MACHINE_NOTIFY_VMEXIT,
> +            "Enable notify VM exit");
>   }
>   
>   static const TypeInfo x86_machine_info = {
> diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
> index 62fa5774f8..5707329fa7 100644
> --- a/include/hw/i386/x86.h
> +++ b/include/hw/i386/x86.h
> @@ -85,6 +85,9 @@ struct X86MachineState {
>        * which means no limitation on the guest's bus locks.
>        */
>       uint64_t bus_lock_ratelimit;
> +
> +    bool notify_vmexit;
> +    uint32_t notify_window;
>   };
>   
>   #define X86_MACHINE_SMM              "smm"
> @@ -94,6 +97,8 @@ struct X86MachineState {
>   #define X86_MACHINE_OEM_ID           "x-oem-id"
>   #define X86_MACHINE_OEM_TABLE_ID     "x-oem-table-id"
>   #define X86_MACHINE_BUS_LOCK_RATELIMIT  "bus-lock-ratelimit"
> +#define X86_MACHINE_NOTIFY_VMEXIT     "notify-vmexit"
> +#define X86_MACHINE_NOTIFY_WINDOW     "notify-window"
>   
>   #define TYPE_X86_MACHINE   MACHINE_TYPE_NAME("x86")
>   OBJECT_DECLARE_TYPE(X86MachineState, X86MachineClass, X86_MACHINE)
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index cb88ba4a00..85f11dd8d6 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -2580,6 +2580,20 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>               ratelimit_set_speed(&bus_lock_ratelimit_ctrl,
>                                   x86ms->bus_lock_ratelimit, BUS_LOCK_SLICE_TIME);
>           }
> +
> +        if (x86ms->notify_vmexit &&
> +            kvm_check_extension(s, KVM_CAP_X86_NOTIFY_VMEXIT)) {
> +            uint64_t notify_window_flags = ((uint64_t)x86ms->notify_window << 32) |
> +                                           KVM_X86_NOTIFY_VMEXIT_ENABLED |
> +                                           KVM_X86_NOTIFY_VMEXIT_USER;
> +            ret = kvm_vm_enable_cap(s, KVM_CAP_X86_NOTIFY_VMEXIT, 0,
> +                                    notify_window_flags);
> +            if (ret < 0) {
> +                error_report("kvm: Failed to enable notify vmexit cap: %s",
> +                             strerror(-ret));
> +                return ret;
> +            }
> +        }
>       }
>   
>       return 0;
> @@ -5117,6 +5131,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
>       X86CPU *cpu = X86_CPU(cs);
>       uint64_t code;
>       int ret;
> +    struct kvm_vcpu_events events = {};
>   
>       switch (run->exit_reason) {
>       case KVM_EXIT_HLT:
> @@ -5172,6 +5187,19 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
>           /* already handled in kvm_arch_post_run */
>           ret = 0;
>           break;
> +    case KVM_EXIT_NOTIFY:
> +        ret = 0;
> +        if (run->notify.flags & KVM_NOTIFY_CONTEXT_INVALID) {
> +            warn_report("KVM: invalid context due to notify vmexit");
> +            if (has_triple_fault_event) {
> +                events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
> +                events.triple_fault.pending = true;
> +                ret = kvm_vcpu_ioctl(cs, KVM_SET_VCPU_EVENTS, &events);
> +            } else {
> +                ret = -1;
> +            }
> +        }
> +        break;
>       default:
>           fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
>           ret = -1;
Peter Xu Sept. 9, 2022, 4:25 p.m. UTC | #2
On Wed, Aug 17, 2022 at 10:08:45AM +0800, Chenyi Qiang wrote:
> There are cases that malicious virtual machine can cause CPU stuck (due
> to event windows don't open up), e.g., infinite loop in microcode when
> nested #AC (CVE-2015-5307). No event window means no event (NMI, SMI and
> IRQ) can be delivered. It leads the CPU to be unavailable to host or
> other VMs. Notify VM exit is introduced to mitigate such kind of
> attacks, which will generate a VM exit if no event window occurs in VM
> non-root mode for a specified amount of time (notify window).
> 
> A new KVM capability KVM_CAP_X86_NOTIFY_VMEXIT is exposed to user space
> so that the user can query the capability and set the expected notify
> window when creating VMs. The format of the argument when enabling this
> capability is as follows:
>   Bit 63:32 - notify window specified in qemu command
>   Bit 31:0  - some flags (e.g. KVM_X86_NOTIFY_VMEXIT_ENABLED is set to
>               enable the feature.)
> 
> Because there are some concerns, e.g. a notify VM exit may happen with
> VM_CONTEXT_INVALID set in exit qualification (no cases are anticipated
> that would set this bit), which means VM context is corrupted. To avoid
> the false positive and a well-behaved guest gets killed, make this
> feature disabled by default. Users can enable the feature by a new
> machine property:
>     qemu -machine notify_vmexit=on,notify_window=0 ...

The patch looks sane to me; I only read the KVM interface, though.  Worth
add a section to qemu-options.hx?  It'll also be worthwhile to mention the
valid range of notify_window and meaning of zero (IIUC that's also a valid
input, just use the hardware default window size).

Thanks,

> 
> A new KVM exit reason KVM_EXIT_NOTIFY is defined for notify VM exit. If
> it happens with VM_INVALID_CONTEXT, hypervisor exits to user space to
> inform the fatal case. Then user space can inject a SHUTDOWN event to
> the target vcpu. This is implemented by injecting a sythesized triple
> fault event.
Chenyi Qiang Sept. 13, 2022, 1:13 a.m. UTC | #3
On 9/10/2022 12:25 AM, Peter Xu wrote:
> On Wed, Aug 17, 2022 at 10:08:45AM +0800, Chenyi Qiang wrote:
>> There are cases that malicious virtual machine can cause CPU stuck (due
>> to event windows don't open up), e.g., infinite loop in microcode when
>> nested #AC (CVE-2015-5307). No event window means no event (NMI, SMI and
>> IRQ) can be delivered. It leads the CPU to be unavailable to host or
>> other VMs. Notify VM exit is introduced to mitigate such kind of
>> attacks, which will generate a VM exit if no event window occurs in VM
>> non-root mode for a specified amount of time (notify window).
>>
>> A new KVM capability KVM_CAP_X86_NOTIFY_VMEXIT is exposed to user space
>> so that the user can query the capability and set the expected notify
>> window when creating VMs. The format of the argument when enabling this
>> capability is as follows:
>>    Bit 63:32 - notify window specified in qemu command
>>    Bit 31:0  - some flags (e.g. KVM_X86_NOTIFY_VMEXIT_ENABLED is set to
>>                enable the feature.)
>>
>> Because there are some concerns, e.g. a notify VM exit may happen with
>> VM_CONTEXT_INVALID set in exit qualification (no cases are anticipated
>> that would set this bit), which means VM context is corrupted. To avoid
>> the false positive and a well-behaved guest gets killed, make this
>> feature disabled by default. Users can enable the feature by a new
>> machine property:
>>      qemu -machine notify_vmexit=on,notify_window=0 ...
> 
> The patch looks sane to me; I only read the KVM interface, though.  Worth
> add a section to qemu-options.hx?  It'll also be worthwhile to mention the
> valid range of notify_window and meaning of zero (IIUC that's also a valid
> input, just use the hardware default window size).
> 

Thanks Peter for your review.

I'll add some doc in qemu-option.hx and also the commit message about 
the valid range in next version.

> Thanks,
> 
>>
>> A new KVM exit reason KVM_EXIT_NOTIFY is defined for notify VM exit. If
>> it happens with VM_INVALID_CONTEXT, hypervisor exits to user space to
>> inform the fatal case. Then user space can inject a SHUTDOWN event to
>> the target vcpu. This is implemented by injecting a sythesized triple
>> fault event.
>
diff mbox series

Patch

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 050eedc0c8..1eccbd3deb 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1379,6 +1379,37 @@  static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name,
     qapi_free_SgxEPCList(list);
 }
 
+static bool x86_machine_get_notify_vmexit(Object *obj, Error **errp)
+{
+    X86MachineState *x86ms = X86_MACHINE(obj);
+
+    return x86ms->notify_vmexit;
+}
+
+static void x86_machine_set_notify_vmexit(Object *obj, bool value, Error **errp)
+{
+    X86MachineState *x86ms = X86_MACHINE(obj);
+
+    x86ms->notify_vmexit = value;
+}
+
+static void x86_machine_get_notify_window(Object *obj, Visitor *v,
+                                const char *name, void *opaque, Error **errp)
+{
+    X86MachineState *x86ms = X86_MACHINE(obj);
+    uint32_t notify_window = x86ms->notify_window;
+
+    visit_type_uint32(v, name, &notify_window, errp);
+}
+
+static void x86_machine_set_notify_window(Object *obj, Visitor *v,
+                               const char *name, void *opaque, Error **errp)
+{
+    X86MachineState *x86ms = X86_MACHINE(obj);
+
+    visit_type_uint32(v, name, &x86ms->notify_window, errp);
+}
+
 static void x86_machine_initfn(Object *obj)
 {
     X86MachineState *x86ms = X86_MACHINE(obj);
@@ -1392,6 +1423,8 @@  static void x86_machine_initfn(Object *obj)
     x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
     x86ms->bus_lock_ratelimit = 0;
     x86ms->above_4g_mem_start = 4 * GiB;
+    x86ms->notify_vmexit = false;
+    x86ms->notify_window = 0;
 }
 
 static void x86_machine_class_init(ObjectClass *oc, void *data)
@@ -1461,6 +1494,18 @@  static void x86_machine_class_init(ObjectClass *oc, void *data)
         NULL, NULL);
     object_class_property_set_description(oc, "sgx-epc",
         "SGX EPC device");
+
+    object_class_property_add(oc, X86_MACHINE_NOTIFY_WINDOW, "uint32_t",
+                              x86_machine_get_notify_window,
+                              x86_machine_set_notify_window, NULL, NULL);
+    object_class_property_set_description(oc, X86_MACHINE_NOTIFY_WINDOW,
+            "Set the notify window required by notify VM exit");
+
+    object_class_property_add_bool(oc, X86_MACHINE_NOTIFY_VMEXIT,
+                                   x86_machine_get_notify_vmexit,
+                                   x86_machine_set_notify_vmexit);
+    object_class_property_set_description(oc, X86_MACHINE_NOTIFY_VMEXIT,
+            "Enable notify VM exit");
 }
 
 static const TypeInfo x86_machine_info = {
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index 62fa5774f8..5707329fa7 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -85,6 +85,9 @@  struct X86MachineState {
      * which means no limitation on the guest's bus locks.
      */
     uint64_t bus_lock_ratelimit;
+
+    bool notify_vmexit;
+    uint32_t notify_window;
 };
 
 #define X86_MACHINE_SMM              "smm"
@@ -94,6 +97,8 @@  struct X86MachineState {
 #define X86_MACHINE_OEM_ID           "x-oem-id"
 #define X86_MACHINE_OEM_TABLE_ID     "x-oem-table-id"
 #define X86_MACHINE_BUS_LOCK_RATELIMIT  "bus-lock-ratelimit"
+#define X86_MACHINE_NOTIFY_VMEXIT     "notify-vmexit"
+#define X86_MACHINE_NOTIFY_WINDOW     "notify-window"
 
 #define TYPE_X86_MACHINE   MACHINE_TYPE_NAME("x86")
 OBJECT_DECLARE_TYPE(X86MachineState, X86MachineClass, X86_MACHINE)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index cb88ba4a00..85f11dd8d6 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -2580,6 +2580,20 @@  int kvm_arch_init(MachineState *ms, KVMState *s)
             ratelimit_set_speed(&bus_lock_ratelimit_ctrl,
                                 x86ms->bus_lock_ratelimit, BUS_LOCK_SLICE_TIME);
         }
+
+        if (x86ms->notify_vmexit &&
+            kvm_check_extension(s, KVM_CAP_X86_NOTIFY_VMEXIT)) {
+            uint64_t notify_window_flags = ((uint64_t)x86ms->notify_window << 32) |
+                                           KVM_X86_NOTIFY_VMEXIT_ENABLED |
+                                           KVM_X86_NOTIFY_VMEXIT_USER;
+            ret = kvm_vm_enable_cap(s, KVM_CAP_X86_NOTIFY_VMEXIT, 0,
+                                    notify_window_flags);
+            if (ret < 0) {
+                error_report("kvm: Failed to enable notify vmexit cap: %s",
+                             strerror(-ret));
+                return ret;
+            }
+        }
     }
 
     return 0;
@@ -5117,6 +5131,7 @@  int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
     X86CPU *cpu = X86_CPU(cs);
     uint64_t code;
     int ret;
+    struct kvm_vcpu_events events = {};
 
     switch (run->exit_reason) {
     case KVM_EXIT_HLT:
@@ -5172,6 +5187,19 @@  int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
         /* already handled in kvm_arch_post_run */
         ret = 0;
         break;
+    case KVM_EXIT_NOTIFY:
+        ret = 0;
+        if (run->notify.flags & KVM_NOTIFY_CONTEXT_INVALID) {
+            warn_report("KVM: invalid context due to notify vmexit");
+            if (has_triple_fault_event) {
+                events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
+                events.triple_fault.pending = true;
+                ret = kvm_vcpu_ioctl(cs, KVM_SET_VCPU_EVENTS, &events);
+            } else {
+                ret = -1;
+            }
+        }
+        break;
     default:
         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
         ret = -1;