diff mbox series

[RFC] i386: Add ratelimit for bus locks acquired in guest

Message ID 20210317084709.15605-1-chenyi.qiang@intel.com (mailing list archive)
State New, archived
Headers show
Series [RFC] i386: Add ratelimit for bus locks acquired in guest | expand

Commit Message

Chenyi Qiang March 17, 2021, 8:47 a.m. UTC
Virtual Machines can exploit bus locks to degrade the performance of
system. To address this kind of performance DOS attack, bus lock VM exit
is introduced in KVM and it will report the bus locks detected in guest,
which can help userspace to enforce throttling policies.

The availability of bus lock VM exit can be detected through the
KVM_CAP_X86_BUS_LOCK_EXIT. The returned bitmap contains the potential
policies supported by KVM. The field KVM_BUS_LOCK_DETECTION_EXIT in
bitmap is the only supported strategy at present. It indicates that KVM
will exit to userspace to handle the bus locks.

This patch adds a ratelimit on the bus locks acquired in guest as a
mitigation policy.

Introduce a new field "bld" to record the limited speed of bus locks in
target VM. The user can specify it through the "bus-lock-detection"
as a machine property. In current implementation, the default value of
the speed is 0 per second, which means no restriction on the bus locks.

Ratelimit enforced in data transmission uses a time slice of 100ms to
get smooth output during regular operations in block jobs. As for
ratelimit on bus lock detection, simply set the ratelimit interval to 1s
and restrict the quota of bus lock occurrence to the value of "bld". A
potential alternative is to introduce the time slice as a property
which can help the user achieve more precise control.

The detail of Bus lock VM exit can be found in spec:
https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html

Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
---
 hw/i386/x86.c         |  6 ++++++
 include/hw/i386/x86.h |  7 +++++++
 target/i386/kvm/kvm.c | 44 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+)

Comments

Marcelo Tosatti March 18, 2021, 5:32 p.m. UTC | #1
On Wed, Mar 17, 2021 at 04:47:09PM +0800, Chenyi Qiang wrote:
> Virtual Machines can exploit bus locks to degrade the performance of
> system. To address this kind of performance DOS attack, bus lock VM exit
> is introduced in KVM and it will report the bus locks detected in guest,
> which can help userspace to enforce throttling policies.

> 
> The availability of bus lock VM exit can be detected through the
> KVM_CAP_X86_BUS_LOCK_EXIT. The returned bitmap contains the potential
> policies supported by KVM. The field KVM_BUS_LOCK_DETECTION_EXIT in
> bitmap is the only supported strategy at present. It indicates that KVM
> will exit to userspace to handle the bus locks.
> 
> This patch adds a ratelimit on the bus locks acquired in guest as a
> mitigation policy.
> 
> Introduce a new field "bld" to record the limited speed of bus locks in
> target VM. The user can specify it through the "bus-lock-detection"
> as a machine property. In current implementation, the default value of
> the speed is 0 per second, which means no restriction on the bus locks.
> 
> Ratelimit enforced in data transmission uses a time slice of 100ms to
> get smooth output during regular operations in block jobs. As for
> ratelimit on bus lock detection, simply set the ratelimit interval to 1s
> and restrict the quota of bus lock occurrence to the value of "bld". A
> potential alternative is to introduce the time slice as a property
> which can help the user achieve more precise control.
> 
> The detail of Bus lock VM exit can be found in spec:
> https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html
> 
> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> ---
>  hw/i386/x86.c         |  6 ++++++
>  include/hw/i386/x86.h |  7 +++++++
>  target/i386/kvm/kvm.c | 44 +++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 57 insertions(+)
> 
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index 7865660e2c..a70a259e97 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -1209,6 +1209,12 @@ static void x86_machine_initfn(Object *obj)
>      x86ms->acpi = ON_OFF_AUTO_AUTO;
>      x86ms->smp_dies = 1;
>      x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS;
> +    x86ms->bld = 0;
> +
> +    object_property_add_uint64_ptr(obj, "bus-lock-detection",
> +                                   &x86ms->bld, OBJ_PROP_FLAG_READWRITE);
> +    object_property_set_description(obj, "bus-lock-detection",
> +            "Bus lock detection ratelimit");
>  }
>  
>  static void x86_machine_class_init(ObjectClass *oc, void *data)
> diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
> index 56080bd1fb..1f0ffbcfb9 100644
> --- a/include/hw/i386/x86.h
> +++ b/include/hw/i386/x86.h
> @@ -72,6 +72,13 @@ struct X86MachineState {
>       * will be translated to MSI messages in the address space.
>       */
>      AddressSpace *ioapic_as;
> +
> +    /*
> +     * ratelimit enforced on detected bus locks, the default value
> +     * is 0 per second
> +     */
> +    uint64_t bld;
> +    RateLimit bld_limit;
>  };
>  
>  #define X86_MACHINE_SMM              "smm"
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index c8d61daf68..724862137d 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -130,6 +130,8 @@ static bool has_msr_mcg_ext_ctl;
>  static struct kvm_cpuid2 *cpuid_cache;
>  static struct kvm_msr_list *kvm_feature_msrs;
>  
> +#define SLICE_TIME 1000000000ULL /* ns */
> +
>  int kvm_has_pit_state2(void)
>  {
>      return has_pit_state2;
> @@ -2267,6 +2269,27 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>          }
>      }
>  
> +    if (object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE)) {
> +        X86MachineState *x86ms = X86_MACHINE(ms);
> +
> +        if (x86ms->bld > 0) {
> +            ret = kvm_check_extension(s, KVM_CAP_X86_BUS_LOCK_EXIT);
> +            if (!(ret & KVM_BUS_LOCK_DETECTION_EXIT)) {
> +                error_report("kvm: bus lock detection unsupported");
> +                return -ENOTSUP;
> +            }
> +            ret = kvm_vm_enable_cap(s, KVM_CAP_X86_BUS_LOCK_EXIT, 0,
> +                                    KVM_BUS_LOCK_DETECTION_EXIT);
> +            if (ret < 0) {
> +                error_report("kvm: Failed to enable bus lock detection cap: %s",
> +                             strerror(-ret));
> +                return ret;
> +            }
> +
> +            ratelimit_set_speed(&x86ms->bld_limit, x86ms->bld, SLICE_TIME);
> +        }
> +    }
> +
>      return 0;
>  }
>  
> @@ -4221,6 +4244,18 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
>      }
>  }
>  
> +static void kvm_rate_limit_on_bus_lock(void)
> +{
> +    MachineState *ms = MACHINE(qdev_get_machine());
> +    X86MachineState *x86ms = X86_MACHINE(ms);
> +
> +    uint64_t delay_ns = ratelimit_calculate_delay(&x86ms->bld_limit, 1);
> +
> +    if (delay_ns) {
> +        g_usleep(delay_ns / SCALE_US);
> +    }
> +}

Hi,

Can't see a use-case where the throttling is very useful: this will
slowdown the application to a halt (if its bus-lock instruction is
being called frequently).

However its very nice to know that it (bus lock) has happened.

So on KVM bus exit do you emulate the instruction or just execute it 
in the guest (without a VM-exit for the second time) ?

(But maybe there are usecases where the throttling makes sense, 
sleep is performed outside global mutex lock, so patch looks good).
Xiaoyao Li March 19, 2021, 1:23 a.m. UTC | #2
On 3/17/2021 4:47 PM, Chenyi Qiang wrote:
[...]
>   MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
>   {
>       X86CPU *x86_cpu = X86_CPU(cpu);
> @@ -4236,6 +4271,11 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
>       } else {
>           env->eflags &= ~IF_MASK;
>       }
> +    if (run->flags & KVM_RUN_X86_BUS_LOCK) {
> +        kvm_cpu_synchronize_state(cpu);
> +        warn_report("bus lock detected at rip: 0x%lx", env->eip);

Chenyi,

Let's drop the eip here since QEMU has no idea whether it points to the 
next instruction or the exact instruction acquires bus lock.

> +        kvm_rate_limit_on_bus_lock();
> +    }
>   
>       /* We need to protect the apic state against concurrent accesses from
>        * different threads in case the userspace irqchip is used. */
> @@ -4594,6 +4634,10 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
>           ioapic_eoi_broadcast(run->eoi.vector);
>           ret = 0;
>           break;
> +    case KVM_EXIT_X86_BUS_LOCK:
> +        /* already handled in kvm_arch_post_run */
> +        ret = 0;
> +        break;
>       default:
>           fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
>           ret = -1;
>
Chenyi Qiang March 19, 2021, 2:59 a.m. UTC | #3
Hi Marcelo,

Thank you for your comment.

On 3/19/2021 1:32 AM, Marcelo Tosatti wrote:
> On Wed, Mar 17, 2021 at 04:47:09PM +0800, Chenyi Qiang wrote:
>> Virtual Machines can exploit bus locks to degrade the performance of
>> system. To address this kind of performance DOS attack, bus lock VM exit
>> is introduced in KVM and it will report the bus locks detected in guest,
>> which can help userspace to enforce throttling policies.
> 
>>
>> The availability of bus lock VM exit can be detected through the
>> KVM_CAP_X86_BUS_LOCK_EXIT. The returned bitmap contains the potential
>> policies supported by KVM. The field KVM_BUS_LOCK_DETECTION_EXIT in
>> bitmap is the only supported strategy at present. It indicates that KVM
>> will exit to userspace to handle the bus locks.
>>
>> This patch adds a ratelimit on the bus locks acquired in guest as a
>> mitigation policy.
>>
>> Introduce a new field "bld" to record the limited speed of bus locks in
>> target VM. The user can specify it through the "bus-lock-detection"
>> as a machine property. In current implementation, the default value of
>> the speed is 0 per second, which means no restriction on the bus locks.
>>
>> Ratelimit enforced in data transmission uses a time slice of 100ms to
>> get smooth output during regular operations in block jobs. As for
>> ratelimit on bus lock detection, simply set the ratelimit interval to 1s
>> and restrict the quota of bus lock occurrence to the value of "bld". A
>> potential alternative is to introduce the time slice as a property
>> which can help the user achieve more precise control.
>>
>> The detail of Bus lock VM exit can be found in spec:
>> https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html
>>
>> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
>> ---
>>   hw/i386/x86.c         |  6 ++++++
>>   include/hw/i386/x86.h |  7 +++++++
>>   target/i386/kvm/kvm.c | 44 +++++++++++++++++++++++++++++++++++++++++++
>>   3 files changed, 57 insertions(+)
>>
>> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
>> index 7865660e2c..a70a259e97 100644
>> --- a/hw/i386/x86.c
>> +++ b/hw/i386/x86.c
>> @@ -1209,6 +1209,12 @@ static void x86_machine_initfn(Object *obj)
>>       x86ms->acpi = ON_OFF_AUTO_AUTO;
>>       x86ms->smp_dies = 1;
>>       x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS;
>> +    x86ms->bld = 0;
>> +
>> +    object_property_add_uint64_ptr(obj, "bus-lock-detection",
>> +                                   &x86ms->bld, OBJ_PROP_FLAG_READWRITE);
>> +    object_property_set_description(obj, "bus-lock-detection",
>> +            "Bus lock detection ratelimit");
>>   }
>>   
>>   static void x86_machine_class_init(ObjectClass *oc, void *data)
>> diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
>> index 56080bd1fb..1f0ffbcfb9 100644
>> --- a/include/hw/i386/x86.h
>> +++ b/include/hw/i386/x86.h
>> @@ -72,6 +72,13 @@ struct X86MachineState {
>>        * will be translated to MSI messages in the address space.
>>        */
>>       AddressSpace *ioapic_as;
>> +
>> +    /*
>> +     * ratelimit enforced on detected bus locks, the default value
>> +     * is 0 per second
>> +     */
>> +    uint64_t bld;
>> +    RateLimit bld_limit;
>>   };
>>   
>>   #define X86_MACHINE_SMM              "smm"
>> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
>> index c8d61daf68..724862137d 100644
>> --- a/target/i386/kvm/kvm.c
>> +++ b/target/i386/kvm/kvm.c
>> @@ -130,6 +130,8 @@ static bool has_msr_mcg_ext_ctl;
>>   static struct kvm_cpuid2 *cpuid_cache;
>>   static struct kvm_msr_list *kvm_feature_msrs;
>>   
>> +#define SLICE_TIME 1000000000ULL /* ns */
>> +
>>   int kvm_has_pit_state2(void)
>>   {
>>       return has_pit_state2;
>> @@ -2267,6 +2269,27 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>>           }
>>       }
>>   
>> +    if (object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE)) {
>> +        X86MachineState *x86ms = X86_MACHINE(ms);
>> +
>> +        if (x86ms->bld > 0) {
>> +            ret = kvm_check_extension(s, KVM_CAP_X86_BUS_LOCK_EXIT);
>> +            if (!(ret & KVM_BUS_LOCK_DETECTION_EXIT)) {
>> +                error_report("kvm: bus lock detection unsupported");
>> +                return -ENOTSUP;
>> +            }
>> +            ret = kvm_vm_enable_cap(s, KVM_CAP_X86_BUS_LOCK_EXIT, 0,
>> +                                    KVM_BUS_LOCK_DETECTION_EXIT);
>> +            if (ret < 0) {
>> +                error_report("kvm: Failed to enable bus lock detection cap: %s",
>> +                             strerror(-ret));
>> +                return ret;
>> +            }
>> +
>> +            ratelimit_set_speed(&x86ms->bld_limit, x86ms->bld, SLICE_TIME);
>> +        }
>> +    }
>> +
>>       return 0;
>>   }
>>   
>> @@ -4221,6 +4244,18 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
>>       }
>>   }
>>   
>> +static void kvm_rate_limit_on_bus_lock(void)
>> +{
>> +    MachineState *ms = MACHINE(qdev_get_machine());
>> +    X86MachineState *x86ms = X86_MACHINE(ms);
>> +
>> +    uint64_t delay_ns = ratelimit_calculate_delay(&x86ms->bld_limit, 1);
>> +
>> +    if (delay_ns) {
>> +        g_usleep(delay_ns / SCALE_US);
>> +    }
>> +}
> 
> Hi,
> 
> Can't see a use-case where the throttling is very useful: this will
> slowdown the application to a halt (if its bus-lock instruction is
> being called frequently).
> 

The throttling is aimed to only slowdown the target application and 
avoid slowdown the whole system (if bus-lock is frequent). As you known, 
bus locks takes more cycles and disrupt performance on other cores.

> However its very nice to know that it (bus lock) has happened.
> 
> So on KVM bus exit do you emulate the instruction or just execute it
> in the guest (without a VM-exit for the second time) ?
> 
Bus lock VM exit is a trap-like VM exit and delivered following the 
execution of the instruction acquiring the bus lock. Thus, it can just 
detect the occurrence of bus locks and can't intercept it. In KVM, we 
don't emulate the instruction. Bus lock already happens and guest will 
proceed to execute.


> (But maybe there are usecases where the throttling makes sense,
> sleep is performed outside global mutex lock, so patch looks good).
>
Chenyi Qiang March 19, 2021, 3:05 a.m. UTC | #4
On 3/19/2021 9:23 AM, Xiaoyao Li wrote:
> On 3/17/2021 4:47 PM, Chenyi Qiang wrote:
> [...]
>>   MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
>>   {
>>       X86CPU *x86_cpu = X86_CPU(cpu);
>> @@ -4236,6 +4271,11 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, 
>> struct kvm_run *run)
>>       } else {
>>           env->eflags &= ~IF_MASK;
>>       }
>> +    if (run->flags & KVM_RUN_X86_BUS_LOCK) {
>> +        kvm_cpu_synchronize_state(cpu);
>> +        warn_report("bus lock detected at rip: 0x%lx", env->eip);
> 
> Chenyi,
> 
> Let's drop the eip here since QEMU has no idea whether it points to the 
> next instruction or the exact instruction acquires bus lock.
> 

Fair enough.

>> +        kvm_rate_limit_on_bus_lock();
>> +    }
>>       /* We need to protect the apic state against concurrent accesses 
>> from
>>        * different threads in case the userspace irqchip is used. */
>> @@ -4594,6 +4634,10 @@ int kvm_arch_handle_exit(CPUState *cs, struct 
>> kvm_run *run)
>>           ioapic_eoi_broadcast(run->eoi.vector);
>>           ret = 0;
>>           break;
>> +    case KVM_EXIT_X86_BUS_LOCK:
>> +        /* already handled in kvm_arch_post_run */
>> +        ret = 0;
>> +        break;
>>       default:
>>           fprintf(stderr, "KVM: unknown exit reason %d\n", 
>> run->exit_reason);
>>           ret = -1;
>>
>
Xiaoyao Li March 19, 2021, 3:16 a.m. UTC | #5
On 3/19/2021 10:59 AM, Chenyi Qiang wrote:
> Hi Marcelo,
> 
> Thank you for your comment.
> 
> On 3/19/2021 1:32 AM, Marcelo Tosatti wrote:
>> On Wed, Mar 17, 2021 at 04:47:09PM +0800, Chenyi Qiang wrote:
>>> Virtual Machines can exploit bus locks to degrade the performance of
>>> system. To address this kind of performance DOS attack, bus lock VM exit
>>> is introduced in KVM and it will report the bus locks detected in guest,
>>> which can help userspace to enforce throttling policies.
>>
>>>
>>> The availability of bus lock VM exit can be detected through the
>>> KVM_CAP_X86_BUS_LOCK_EXIT. The returned bitmap contains the potential
>>> policies supported by KVM. The field KVM_BUS_LOCK_DETECTION_EXIT in
>>> bitmap is the only supported strategy at present. It indicates that KVM
>>> will exit to userspace to handle the bus locks.
>>>
>>> This patch adds a ratelimit on the bus locks acquired in guest as a
>>> mitigation policy.
>>>
>>> Introduce a new field "bld" to record the limited speed of bus locks in
>>> target VM. The user can specify it through the "bus-lock-detection"
>>> as a machine property. In current implementation, the default value of
>>> the speed is 0 per second, which means no restriction on the bus locks.
>>>
>>> Ratelimit enforced in data transmission uses a time slice of 100ms to
>>> get smooth output during regular operations in block jobs. As for
>>> ratelimit on bus lock detection, simply set the ratelimit interval to 1s
>>> and restrict the quota of bus lock occurrence to the value of "bld". A
>>> potential alternative is to introduce the time slice as a property
>>> which can help the user achieve more precise control.
>>>
>>> The detail of Bus lock VM exit can be found in spec:
>>> https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html 
>>>
>>>
>>> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
>>> ---
>>>   hw/i386/x86.c         |  6 ++++++
>>>   include/hw/i386/x86.h |  7 +++++++
>>>   target/i386/kvm/kvm.c | 44 +++++++++++++++++++++++++++++++++++++++++++
>>>   3 files changed, 57 insertions(+)
>>>
>>> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
>>> index 7865660e2c..a70a259e97 100644
>>> --- a/hw/i386/x86.c
>>> +++ b/hw/i386/x86.c
>>> @@ -1209,6 +1209,12 @@ static void x86_machine_initfn(Object *obj)
>>>       x86ms->acpi = ON_OFF_AUTO_AUTO;
>>>       x86ms->smp_dies = 1;
>>>       x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS;
>>> +    x86ms->bld = 0;
>>> +
>>> +    object_property_add_uint64_ptr(obj, "bus-lock-detection",
>>> +                                   &x86ms->bld, 
>>> OBJ_PROP_FLAG_READWRITE);
>>> +    object_property_set_description(obj, "bus-lock-detection",
>>> +            "Bus lock detection ratelimit");
>>>   }
>>>   static void x86_machine_class_init(ObjectClass *oc, void *data)
>>> diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
>>> index 56080bd1fb..1f0ffbcfb9 100644
>>> --- a/include/hw/i386/x86.h
>>> +++ b/include/hw/i386/x86.h
>>> @@ -72,6 +72,13 @@ struct X86MachineState {
>>>        * will be translated to MSI messages in the address space.
>>>        */
>>>       AddressSpace *ioapic_as;
>>> +
>>> +    /*
>>> +     * ratelimit enforced on detected bus locks, the default value
>>> +     * is 0 per second
>>> +     */
>>> +    uint64_t bld;
>>> +    RateLimit bld_limit;
>>>   };
>>>   #define X86_MACHINE_SMM              "smm"
>>> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
>>> index c8d61daf68..724862137d 100644
>>> --- a/target/i386/kvm/kvm.c
>>> +++ b/target/i386/kvm/kvm.c
>>> @@ -130,6 +130,8 @@ static bool has_msr_mcg_ext_ctl;
>>>   static struct kvm_cpuid2 *cpuid_cache;
>>>   static struct kvm_msr_list *kvm_feature_msrs;
>>> +#define SLICE_TIME 1000000000ULL /* ns */
>>> +
>>>   int kvm_has_pit_state2(void)
>>>   {
>>>       return has_pit_state2;
>>> @@ -2267,6 +2269,27 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>>>           }
>>>       }
>>> +    if (object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE)) {
>>> +        X86MachineState *x86ms = X86_MACHINE(ms);
>>> +
>>> +        if (x86ms->bld > 0) {
>>> +            ret = kvm_check_extension(s, KVM_CAP_X86_BUS_LOCK_EXIT);
>>> +            if (!(ret & KVM_BUS_LOCK_DETECTION_EXIT)) {
>>> +                error_report("kvm: bus lock detection unsupported");
>>> +                return -ENOTSUP;
>>> +            }
>>> +            ret = kvm_vm_enable_cap(s, KVM_CAP_X86_BUS_LOCK_EXIT, 0,
>>> +                                    KVM_BUS_LOCK_DETECTION_EXIT);
>>> +            if (ret < 0) {
>>> +                error_report("kvm: Failed to enable bus lock 
>>> detection cap: %s",
>>> +                             strerror(-ret));
>>> +                return ret;
>>> +            }
>>> +
>>> +            ratelimit_set_speed(&x86ms->bld_limit, x86ms->bld, 
>>> SLICE_TIME);
>>> +        }
>>> +    }
>>> +
>>>       return 0;
>>>   }
>>> @@ -4221,6 +4244,18 @@ void kvm_arch_pre_run(CPUState *cpu, struct 
>>> kvm_run *run)
>>>       }
>>>   }
>>> +static void kvm_rate_limit_on_bus_lock(void)
>>> +{
>>> +    MachineState *ms = MACHINE(qdev_get_machine());
>>> +    X86MachineState *x86ms = X86_MACHINE(ms);
>>> +
>>> +    uint64_t delay_ns = ratelimit_calculate_delay(&x86ms->bld_limit, 
>>> 1);
>>> +
>>> +    if (delay_ns) {
>>> +        g_usleep(delay_ns / SCALE_US);
>>> +    }
>>> +}
>>
>> Hi,
>>
>> Can't see a use-case where the throttling is very useful: this will
>> slowdown the application to a halt (if its bus-lock instruction is
>> being called frequently).
>>
> 
> The throttling is aimed to only slowdown the target application and 
> avoid slowdown the whole system (if bus-lock is frequent). As you known, 
> bus locks takes more cycles and disrupt performance on other cores.
> 
>> However its very nice to know that it (bus lock) has happened.
>>
>> So on KVM bus exit do you emulate the instruction or just execute it
>> in the guest (without a VM-exit for the second time) ?
>>
> Bus lock VM exit is a trap-like VM exit and delivered following the 
> execution of the instruction acquiring the bus lock. Thus, it can just 
> detect the occurrence of bus locks and can't intercept it. In KVM, we 
> don't emulate the instruction. Bus lock already happens and guest will 
> proceed to execute.
> 

It's half-right.

1) When VM exit is due to bus lock VM exit, it's a trap-like VM exit and 
the instruction causing VM exit has already executed then no need to 
emulate.

2) When VM exit is other VM exit, but the instruction also acquires bus 
lock, then
   a) if the VM exit is fault-like, the rip points at the faulting
      instruction. After handling and VM-enter, it's supposed to trigger
      a Bus Lock VM exit as 1)

   b) if the VM exit is trap-like, the rip points at the next
      instruction. Same as 1), no need to emulate.

>> (But maybe there are usecases where the throttling makes sense,
>> sleep is performed outside global mutex lock, so patch looks good).
>>
Marcelo Tosatti March 19, 2021, 12:37 p.m. UTC | #6
On Fri, Mar 19, 2021 at 10:59:20AM +0800, Chenyi Qiang wrote:
> Hi Marcelo,
> 
> Thank you for your comment.
> 
> On 3/19/2021 1:32 AM, Marcelo Tosatti wrote:
> > On Wed, Mar 17, 2021 at 04:47:09PM +0800, Chenyi Qiang wrote:
> > > Virtual Machines can exploit bus locks to degrade the performance of
> > > system. To address this kind of performance DOS attack, bus lock VM exit
> > > is introduced in KVM and it will report the bus locks detected in guest,
> > > which can help userspace to enforce throttling policies.
> > 
> > > 
> > > The availability of bus lock VM exit can be detected through the
> > > KVM_CAP_X86_BUS_LOCK_EXIT. The returned bitmap contains the potential
> > > policies supported by KVM. The field KVM_BUS_LOCK_DETECTION_EXIT in
> > > bitmap is the only supported strategy at present. It indicates that KVM
> > > will exit to userspace to handle the bus locks.
> > > 
> > > This patch adds a ratelimit on the bus locks acquired in guest as a
> > > mitigation policy.
> > > 
> > > Introduce a new field "bld" to record the limited speed of bus locks in
> > > target VM. The user can specify it through the "bus-lock-detection"
> > > as a machine property. In current implementation, the default value of
> > > the speed is 0 per second, which means no restriction on the bus locks.
> > > 
> > > Ratelimit enforced in data transmission uses a time slice of 100ms to
> > > get smooth output during regular operations in block jobs. As for
> > > ratelimit on bus lock detection, simply set the ratelimit interval to 1s
> > > and restrict the quota of bus lock occurrence to the value of "bld". A
> > > potential alternative is to introduce the time slice as a property
> > > which can help the user achieve more precise control.
> > > 
> > > The detail of Bus lock VM exit can be found in spec:
> > > https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html
> > > 
> > > Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> > > ---
> > >   hw/i386/x86.c         |  6 ++++++
> > >   include/hw/i386/x86.h |  7 +++++++
> > >   target/i386/kvm/kvm.c | 44 +++++++++++++++++++++++++++++++++++++++++++
> > >   3 files changed, 57 insertions(+)
> > > 
> > > diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> > > index 7865660e2c..a70a259e97 100644
> > > --- a/hw/i386/x86.c
> > > +++ b/hw/i386/x86.c
> > > @@ -1209,6 +1209,12 @@ static void x86_machine_initfn(Object *obj)
> > >       x86ms->acpi = ON_OFF_AUTO_AUTO;
> > >       x86ms->smp_dies = 1;
> > >       x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS;
> > > +    x86ms->bld = 0;
> > > +
> > > +    object_property_add_uint64_ptr(obj, "bus-lock-detection",
> > > +                                   &x86ms->bld, OBJ_PROP_FLAG_READWRITE);
> > > +    object_property_set_description(obj, "bus-lock-detection",
> > > +            "Bus lock detection ratelimit");
> > >   }
> > >   static void x86_machine_class_init(ObjectClass *oc, void *data)
> > > diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
> > > index 56080bd1fb..1f0ffbcfb9 100644
> > > --- a/include/hw/i386/x86.h
> > > +++ b/include/hw/i386/x86.h
> > > @@ -72,6 +72,13 @@ struct X86MachineState {
> > >        * will be translated to MSI messages in the address space.
> > >        */
> > >       AddressSpace *ioapic_as;
> > > +
> > > +    /*
> > > +     * ratelimit enforced on detected bus locks, the default value
> > > +     * is 0 per second
> > > +     */
> > > +    uint64_t bld;
> > > +    RateLimit bld_limit;
> > >   };
> > >   #define X86_MACHINE_SMM              "smm"
> > > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> > > index c8d61daf68..724862137d 100644
> > > --- a/target/i386/kvm/kvm.c
> > > +++ b/target/i386/kvm/kvm.c
> > > @@ -130,6 +130,8 @@ static bool has_msr_mcg_ext_ctl;
> > >   static struct kvm_cpuid2 *cpuid_cache;
> > >   static struct kvm_msr_list *kvm_feature_msrs;
> > > +#define SLICE_TIME 1000000000ULL /* ns */
> > > +
> > >   int kvm_has_pit_state2(void)
> > >   {
> > >       return has_pit_state2;
> > > @@ -2267,6 +2269,27 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
> > >           }
> > >       }
> > > +    if (object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE)) {
> > > +        X86MachineState *x86ms = X86_MACHINE(ms);
> > > +
> > > +        if (x86ms->bld > 0) {
> > > +            ret = kvm_check_extension(s, KVM_CAP_X86_BUS_LOCK_EXIT);
> > > +            if (!(ret & KVM_BUS_LOCK_DETECTION_EXIT)) {
> > > +                error_report("kvm: bus lock detection unsupported");
> > > +                return -ENOTSUP;
> > > +            }
> > > +            ret = kvm_vm_enable_cap(s, KVM_CAP_X86_BUS_LOCK_EXIT, 0,
> > > +                                    KVM_BUS_LOCK_DETECTION_EXIT);
> > > +            if (ret < 0) {
> > > +                error_report("kvm: Failed to enable bus lock detection cap: %s",
> > > +                             strerror(-ret));
> > > +                return ret;
> > > +            }
> > > +
> > > +            ratelimit_set_speed(&x86ms->bld_limit, x86ms->bld, SLICE_TIME);
> > > +        }
> > > +    }
> > > +
> > >       return 0;
> > >   }
> > > @@ -4221,6 +4244,18 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
> > >       }
> > >   }
> > > +static void kvm_rate_limit_on_bus_lock(void)
> > > +{
> > > +    MachineState *ms = MACHINE(qdev_get_machine());
> > > +    X86MachineState *x86ms = X86_MACHINE(ms);
> > > +
> > > +    uint64_t delay_ns = ratelimit_calculate_delay(&x86ms->bld_limit, 1);
> > > +
> > > +    if (delay_ns) {
> > > +        g_usleep(delay_ns / SCALE_US);
> > > +    }
> > > +}
> > 
> > Hi,
> > 
> > Can't see a use-case where the throttling is very useful: this will
> > slowdown the application to a halt (if its bus-lock instruction is
> > being called frequently).
> > 
> 
> The throttling is aimed to only slowdown the target application and avoid
> slowdown the whole system (if bus-lock is frequent). 

Right.

> As you known, bus locks
> takes more cycles and disrupt performance on other cores.

Right, testcase shows its pretty bad (cyclictest latency goes from 5us to >
200us).
> 
> > However its very nice to know that it (bus lock) has happened.
> > 
> > So on KVM bus exit do you emulate the instruction or just execute it
> > in the guest (without a VM-exit for the second time) ?
> > 
> Bus lock VM exit is a trap-like VM exit and delivered following the
> execution of the instruction acquiring the bus lock. Thus, it can just
> detect the occurrence of bus locks and can't intercept it. In KVM, we don't
> emulate the instruction. Bus lock already happens and guest will proceed to
> execute.

I see.

Question: its possible to #AC trap the split-lock in a KVM guest, right?

https://lwn.net/Articles/810317/
Chenyi Qiang March 22, 2021, 4:11 a.m. UTC | #7
On 3/19/2021 8:37 PM, Marcelo Tosatti wrote:
> On Fri, Mar 19, 2021 at 10:59:20AM +0800, Chenyi Qiang wrote:
>> Hi Marcelo,
>>
>> Thank you for your comment.
>>
>> On 3/19/2021 1:32 AM, Marcelo Tosatti wrote:
>>> On Wed, Mar 17, 2021 at 04:47:09PM +0800, Chenyi Qiang wrote:
>>>> Virtual Machines can exploit bus locks to degrade the performance of
>>>> system. To address this kind of performance DOS attack, bus lock VM exit
>>>> is introduced in KVM and it will report the bus locks detected in guest,
>>>> which can help userspace to enforce throttling policies.
>>>
>>>>
>>>> The availability of bus lock VM exit can be detected through the
>>>> KVM_CAP_X86_BUS_LOCK_EXIT. The returned bitmap contains the potential
>>>> policies supported by KVM. The field KVM_BUS_LOCK_DETECTION_EXIT in
>>>> bitmap is the only supported strategy at present. It indicates that KVM
>>>> will exit to userspace to handle the bus locks.
>>>>
>>>> This patch adds a ratelimit on the bus locks acquired in guest as a
>>>> mitigation policy.
>>>>
>>>> Introduce a new field "bld" to record the limited speed of bus locks in
>>>> target VM. The user can specify it through the "bus-lock-detection"
>>>> as a machine property. In current implementation, the default value of
>>>> the speed is 0 per second, which means no restriction on the bus locks.
>>>>
>>>> Ratelimit enforced in data transmission uses a time slice of 100ms to
>>>> get smooth output during regular operations in block jobs. As for
>>>> ratelimit on bus lock detection, simply set the ratelimit interval to 1s
>>>> and restrict the quota of bus lock occurrence to the value of "bld". A
>>>> potential alternative is to introduce the time slice as a property
>>>> which can help the user achieve more precise control.
>>>>
>>>> The detail of Bus lock VM exit can be found in spec:
>>>> https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html
>>>>
>>>> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
>>>> ---
>>>>    hw/i386/x86.c         |  6 ++++++
>>>>    include/hw/i386/x86.h |  7 +++++++
>>>>    target/i386/kvm/kvm.c | 44 +++++++++++++++++++++++++++++++++++++++++++
>>>>    3 files changed, 57 insertions(+)
>>>>
>>>> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
>>>> index 7865660e2c..a70a259e97 100644
>>>> --- a/hw/i386/x86.c
>>>> +++ b/hw/i386/x86.c
>>>> @@ -1209,6 +1209,12 @@ static void x86_machine_initfn(Object *obj)
>>>>        x86ms->acpi = ON_OFF_AUTO_AUTO;
>>>>        x86ms->smp_dies = 1;
>>>>        x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS;
>>>> +    x86ms->bld = 0;
>>>> +
>>>> +    object_property_add_uint64_ptr(obj, "bus-lock-detection",
>>>> +                                   &x86ms->bld, OBJ_PROP_FLAG_READWRITE);
>>>> +    object_property_set_description(obj, "bus-lock-detection",
>>>> +            "Bus lock detection ratelimit");
>>>>    }
>>>>    static void x86_machine_class_init(ObjectClass *oc, void *data)
>>>> diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
>>>> index 56080bd1fb..1f0ffbcfb9 100644
>>>> --- a/include/hw/i386/x86.h
>>>> +++ b/include/hw/i386/x86.h
>>>> @@ -72,6 +72,13 @@ struct X86MachineState {
>>>>         * will be translated to MSI messages in the address space.
>>>>         */
>>>>        AddressSpace *ioapic_as;
>>>> +
>>>> +    /*
>>>> +     * ratelimit enforced on detected bus locks, the default value
>>>> +     * is 0 per second
>>>> +     */
>>>> +    uint64_t bld;
>>>> +    RateLimit bld_limit;
>>>>    };
>>>>    #define X86_MACHINE_SMM              "smm"
>>>> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
>>>> index c8d61daf68..724862137d 100644
>>>> --- a/target/i386/kvm/kvm.c
>>>> +++ b/target/i386/kvm/kvm.c
>>>> @@ -130,6 +130,8 @@ static bool has_msr_mcg_ext_ctl;
>>>>    static struct kvm_cpuid2 *cpuid_cache;
>>>>    static struct kvm_msr_list *kvm_feature_msrs;
>>>> +#define SLICE_TIME 1000000000ULL /* ns */
>>>> +
>>>>    int kvm_has_pit_state2(void)
>>>>    {
>>>>        return has_pit_state2;
>>>> @@ -2267,6 +2269,27 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>>>>            }
>>>>        }
>>>> +    if (object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE)) {
>>>> +        X86MachineState *x86ms = X86_MACHINE(ms);
>>>> +
>>>> +        if (x86ms->bld > 0) {
>>>> +            ret = kvm_check_extension(s, KVM_CAP_X86_BUS_LOCK_EXIT);
>>>> +            if (!(ret & KVM_BUS_LOCK_DETECTION_EXIT)) {
>>>> +                error_report("kvm: bus lock detection unsupported");
>>>> +                return -ENOTSUP;
>>>> +            }
>>>> +            ret = kvm_vm_enable_cap(s, KVM_CAP_X86_BUS_LOCK_EXIT, 0,
>>>> +                                    KVM_BUS_LOCK_DETECTION_EXIT);
>>>> +            if (ret < 0) {
>>>> +                error_report("kvm: Failed to enable bus lock detection cap: %s",
>>>> +                             strerror(-ret));
>>>> +                return ret;
>>>> +            }
>>>> +
>>>> +            ratelimit_set_speed(&x86ms->bld_limit, x86ms->bld, SLICE_TIME);
>>>> +        }
>>>> +    }
>>>> +
>>>>        return 0;
>>>>    }
>>>> @@ -4221,6 +4244,18 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
>>>>        }
>>>>    }
>>>> +static void kvm_rate_limit_on_bus_lock(void)
>>>> +{
>>>> +    MachineState *ms = MACHINE(qdev_get_machine());
>>>> +    X86MachineState *x86ms = X86_MACHINE(ms);
>>>> +
>>>> +    uint64_t delay_ns = ratelimit_calculate_delay(&x86ms->bld_limit, 1);
>>>> +
>>>> +    if (delay_ns) {
>>>> +        g_usleep(delay_ns / SCALE_US);
>>>> +    }
>>>> +}
>>>
>>> Hi,
>>>
>>> Can't see a use-case where the throttling is very useful: this will
>>> slowdown the application to a halt (if its bus-lock instruction is
>>> being called frequently).
>>>
>>
>> The throttling is aimed to only slowdown the target application and avoid
>> slowdown the whole system (if bus-lock is frequent).
> 
> Right.
> 
>> As you known, bus locks
>> takes more cycles and disrupt performance on other cores.
> 
> Right, testcase shows its pretty bad (cyclictest latency goes from 5us to >
> 200us).
>>
>>> However its very nice to know that it (bus lock) has happened.
>>>
>>> So on KVM bus exit do you emulate the instruction or just execute it
>>> in the guest (without a VM-exit for the second time) ?
>>>
>> Bus lock VM exit is a trap-like VM exit and delivered following the
>> execution of the instruction acquiring the bus lock. Thus, it can just
>> detect the occurrence of bus locks and can't intercept it. In KVM, we don't
>> emulate the instruction. Bus lock already happens and guest will proceed to
>> execute.
> 
> I see.
> 
> Question: its possible to #AC trap the split-lock in a KVM guest, right?
> 
> https://lwn.net/Articles/810317/
> 

Right. Split lock can be detected by #AC trap, the trap is triggered 
*before* the instruction acquires bus lock.

>
Chenyi Qiang April 1, 2021, 4:19 a.m. UTC | #8
Hi Paolo,

Do we have any comment for this Bus lock VM exit QEMU patch?

On 3/17/2021 4:47 PM, Chenyi Qiang wrote:
> Virtual Machines can exploit bus locks to degrade the performance of
> system. To address this kind of performance DOS attack, bus lock VM exit
> is introduced in KVM and it will report the bus locks detected in guest,
> which can help userspace to enforce throttling policies.
> 
> The availability of bus lock VM exit can be detected through the
> KVM_CAP_X86_BUS_LOCK_EXIT. The returned bitmap contains the potential
> policies supported by KVM. The field KVM_BUS_LOCK_DETECTION_EXIT in
> bitmap is the only supported strategy at present. It indicates that KVM
> will exit to userspace to handle the bus locks.
> 
> This patch adds a ratelimit on the bus locks acquired in guest as a
> mitigation policy.
> 
> Introduce a new field "bld" to record the limited speed of bus locks in
> target VM. The user can specify it through the "bus-lock-detection"
> as a machine property. In current implementation, the default value of
> the speed is 0 per second, which means no restriction on the bus locks.
> 
> Ratelimit enforced in data transmission uses a time slice of 100ms to
> get smooth output during regular operations in block jobs. As for
> ratelimit on bus lock detection, simply set the ratelimit interval to 1s
> and restrict the quota of bus lock occurrence to the value of "bld". A
> potential alternative is to introduce the time slice as a property
> which can help the user achieve more precise control.
> 
> The detail of Bus lock VM exit can be found in spec:
> https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html
> 
> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> ---
>   hw/i386/x86.c         |  6 ++++++
>   include/hw/i386/x86.h |  7 +++++++
>   target/i386/kvm/kvm.c | 44 +++++++++++++++++++++++++++++++++++++++++++
>   3 files changed, 57 insertions(+)
> 
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index 7865660e2c..a70a259e97 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -1209,6 +1209,12 @@ static void x86_machine_initfn(Object *obj)
>       x86ms->acpi = ON_OFF_AUTO_AUTO;
>       x86ms->smp_dies = 1;
>       x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS;
> +    x86ms->bld = 0;
> +
> +    object_property_add_uint64_ptr(obj, "bus-lock-detection",
> +                                   &x86ms->bld, OBJ_PROP_FLAG_READWRITE);
> +    object_property_set_description(obj, "bus-lock-detection",
> +            "Bus lock detection ratelimit");
>   }
>   
>   static void x86_machine_class_init(ObjectClass *oc, void *data)
> diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
> index 56080bd1fb..1f0ffbcfb9 100644
> --- a/include/hw/i386/x86.h
> +++ b/include/hw/i386/x86.h
> @@ -72,6 +72,13 @@ struct X86MachineState {
>        * will be translated to MSI messages in the address space.
>        */
>       AddressSpace *ioapic_as;
> +
> +    /*
> +     * ratelimit enforced on detected bus locks, the default value
> +     * is 0 per second
> +     */
> +    uint64_t bld;
> +    RateLimit bld_limit;
>   };
>   
>   #define X86_MACHINE_SMM              "smm"
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index c8d61daf68..724862137d 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -130,6 +130,8 @@ static bool has_msr_mcg_ext_ctl;
>   static struct kvm_cpuid2 *cpuid_cache;
>   static struct kvm_msr_list *kvm_feature_msrs;
>   
> +#define SLICE_TIME 1000000000ULL /* ns */
> +
>   int kvm_has_pit_state2(void)
>   {
>       return has_pit_state2;
> @@ -2267,6 +2269,27 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>           }
>       }
>   
> +    if (object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE)) {
> +        X86MachineState *x86ms = X86_MACHINE(ms);
> +
> +        if (x86ms->bld > 0) {
> +            ret = kvm_check_extension(s, KVM_CAP_X86_BUS_LOCK_EXIT);
> +            if (!(ret & KVM_BUS_LOCK_DETECTION_EXIT)) {
> +                error_report("kvm: bus lock detection unsupported");
> +                return -ENOTSUP;
> +            }
> +            ret = kvm_vm_enable_cap(s, KVM_CAP_X86_BUS_LOCK_EXIT, 0,
> +                                    KVM_BUS_LOCK_DETECTION_EXIT);
> +            if (ret < 0) {
> +                error_report("kvm: Failed to enable bus lock detection cap: %s",
> +                             strerror(-ret));
> +                return ret;
> +            }
> +
> +            ratelimit_set_speed(&x86ms->bld_limit, x86ms->bld, SLICE_TIME);
> +        }
> +    }
> +
>       return 0;
>   }
>   
> @@ -4221,6 +4244,18 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
>       }
>   }
>   
> +static void kvm_rate_limit_on_bus_lock(void)
> +{
> +    MachineState *ms = MACHINE(qdev_get_machine());
> +    X86MachineState *x86ms = X86_MACHINE(ms);
> +
> +    uint64_t delay_ns = ratelimit_calculate_delay(&x86ms->bld_limit, 1);
> +
> +    if (delay_ns) {
> +        g_usleep(delay_ns / SCALE_US);
> +    }
> +}
> +
>   MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
>   {
>       X86CPU *x86_cpu = X86_CPU(cpu);
> @@ -4236,6 +4271,11 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
>       } else {
>           env->eflags &= ~IF_MASK;
>       }
> +    if (run->flags & KVM_RUN_X86_BUS_LOCK) {
> +        kvm_cpu_synchronize_state(cpu);
> +        warn_report("bus lock detected at rip: 0x%lx", env->eip);
> +        kvm_rate_limit_on_bus_lock();
> +    }
>   
>       /* We need to protect the apic state against concurrent accesses from
>        * different threads in case the userspace irqchip is used. */
> @@ -4594,6 +4634,10 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
>           ioapic_eoi_broadcast(run->eoi.vector);
>           ret = 0;
>           break;
> +    case KVM_EXIT_X86_BUS_LOCK:
> +        /* already handled in kvm_arch_post_run */
> +        ret = 0;
> +        break;
>       default:
>           fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
>           ret = -1;
>
diff mbox series

Patch

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 7865660e2c..a70a259e97 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1209,6 +1209,12 @@  static void x86_machine_initfn(Object *obj)
     x86ms->acpi = ON_OFF_AUTO_AUTO;
     x86ms->smp_dies = 1;
     x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS;
+    x86ms->bld = 0;
+
+    object_property_add_uint64_ptr(obj, "bus-lock-detection",
+                                   &x86ms->bld, OBJ_PROP_FLAG_READWRITE);
+    object_property_set_description(obj, "bus-lock-detection",
+            "Bus lock detection ratelimit");
 }
 
 static void x86_machine_class_init(ObjectClass *oc, void *data)
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index 56080bd1fb..1f0ffbcfb9 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -72,6 +72,13 @@  struct X86MachineState {
      * will be translated to MSI messages in the address space.
      */
     AddressSpace *ioapic_as;
+
+    /*
+     * ratelimit enforced on detected bus locks, the default value
+     * is 0 per second
+     */
+    uint64_t bld;
+    RateLimit bld_limit;
 };
 
 #define X86_MACHINE_SMM              "smm"
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index c8d61daf68..724862137d 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -130,6 +130,8 @@  static bool has_msr_mcg_ext_ctl;
 static struct kvm_cpuid2 *cpuid_cache;
 static struct kvm_msr_list *kvm_feature_msrs;
 
+#define SLICE_TIME 1000000000ULL /* ns */
+
 int kvm_has_pit_state2(void)
 {
     return has_pit_state2;
@@ -2267,6 +2269,27 @@  int kvm_arch_init(MachineState *ms, KVMState *s)
         }
     }
 
+    if (object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE)) {
+        X86MachineState *x86ms = X86_MACHINE(ms);
+
+        if (x86ms->bld > 0) {
+            ret = kvm_check_extension(s, KVM_CAP_X86_BUS_LOCK_EXIT);
+            if (!(ret & KVM_BUS_LOCK_DETECTION_EXIT)) {
+                error_report("kvm: bus lock detection unsupported");
+                return -ENOTSUP;
+            }
+            ret = kvm_vm_enable_cap(s, KVM_CAP_X86_BUS_LOCK_EXIT, 0,
+                                    KVM_BUS_LOCK_DETECTION_EXIT);
+            if (ret < 0) {
+                error_report("kvm: Failed to enable bus lock detection cap: %s",
+                             strerror(-ret));
+                return ret;
+            }
+
+            ratelimit_set_speed(&x86ms->bld_limit, x86ms->bld, SLICE_TIME);
+        }
+    }
+
     return 0;
 }
 
@@ -4221,6 +4244,18 @@  void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
     }
 }
 
+static void kvm_rate_limit_on_bus_lock(void)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    X86MachineState *x86ms = X86_MACHINE(ms);
+
+    uint64_t delay_ns = ratelimit_calculate_delay(&x86ms->bld_limit, 1);
+
+    if (delay_ns) {
+        g_usleep(delay_ns / SCALE_US);
+    }
+}
+
 MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
 {
     X86CPU *x86_cpu = X86_CPU(cpu);
@@ -4236,6 +4271,11 @@  MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
     } else {
         env->eflags &= ~IF_MASK;
     }
+    if (run->flags & KVM_RUN_X86_BUS_LOCK) {
+        kvm_cpu_synchronize_state(cpu);
+        warn_report("bus lock detected at rip: 0x%lx", env->eip);
+        kvm_rate_limit_on_bus_lock();
+    }
 
     /* We need to protect the apic state against concurrent accesses from
      * different threads in case the userspace irqchip is used. */
@@ -4594,6 +4634,10 @@  int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
         ioapic_eoi_broadcast(run->eoi.vector);
         ret = 0;
         break;
+    case KVM_EXIT_X86_BUS_LOCK:
+        /* already handled in kvm_arch_post_run */
+        ret = 0;
+        break;
     default:
         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
         ret = -1;