diff mbox

[v3,1/2] KVM: VMX: enable acknowledge interupt on vmexit

Message ID 1361281183-22759-2-git-send-email-yang.z.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zhang, Yang Z Feb. 19, 2013, 1:39 p.m. UTC
From: Yang Zhang <yang.z.zhang@Intel.com>

The "acknowledge interrupt on exit" feature controls processor behavior
for external interrupt acknowledgement. When this control is set, the
processor acknowledges the interrupt controller to acquire the
interrupt vector on VM exit.

After enabling this feature, an interrupt which arrived when target cpu is
running in vmx non-root mode will be handled by vmx handler instead of handler
in idt. Currently, vmx handler only fakes an interrupt stack and jump to idt
table to let real handler to handle it. Further, we will recognize the interrupt
and only delivery the interrupt which not belong to current vcpu through idt table.
The interrupt which belonged to current vcpu will be handled inside vmx handler.
This will reduce the interrupt handle cost of KVM.

Also, interrupt enable logic is changed if this feature is turnning on:
Before this patch, hypervior call local_irq_enable() to enable it directly.
Now IF bit is set on interrupt stack frame, and will be enabled on a return from
interrupt handler if exterrupt interrupt exists. If no external interrupt, still
call local_irq_enable() to enable it.

Refer to Intel SDM volum 3, chapter 33.2.

Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
---
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/svm.c              |    6 +++
 arch/x86/kvm/vmx.c              |   69 ++++++++++++++++++++++++++++++++++++--
 arch/x86/kvm/x86.c              |    4 ++-
 4 files changed, 75 insertions(+), 5 deletions(-)

Comments

Avi Kivity Feb. 19, 2013, 5:35 p.m. UTC | #1
On Tue, Feb 19, 2013 at 3:39 PM, Yang Zhang <yang.z.zhang@intel.com> wrote:
> From: Yang Zhang <yang.z.zhang@Intel.com>
>
> The "acknowledge interrupt on exit" feature controls processor behavior
> for external interrupt acknowledgement. When this control is set, the
> processor acknowledges the interrupt controller to acquire the
> interrupt vector on VM exit.
>
> After enabling this feature, an interrupt which arrived when target cpu is
> running in vmx non-root mode will be handled by vmx handler instead of handler
> in idt. Currently, vmx handler only fakes an interrupt stack and jump to idt
> table to let real handler to handle it. Further, we will recognize the interrupt
> and only delivery the interrupt which not belong to current vcpu through idt table.
> The interrupt which belonged to current vcpu will be handled inside vmx handler.
> This will reduce the interrupt handle cost of KVM.
>
> Also, interrupt enable logic is changed if this feature is turnning on:
> Before this patch, hypervior call local_irq_enable() to enable it directly.
> Now IF bit is set on interrupt stack frame, and will be enabled on a return from
> interrupt handler if exterrupt interrupt exists. If no external interrupt, still
> call local_irq_enable() to enable it.
>
> Refer to Intel SDM volum 3, chapter 33.2.
>

> +static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
> +{
> +       u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
> +
> +       /*
> +        * If external interrupt exists, IF bit is set in rflags/eflags on the
> +        * interrupt stack frame, and interrupt will be enabled on a return
> +        * from interrupt handler.
> +        */
> +       if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
> +                       == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
> +               unsigned int vector;
> +               unsigned long entry;
> +               gate_desc *desc;
> +               struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> +               vector =  exit_intr_info & INTR_INFO_VECTOR_MASK;
> +#ifdef CONFIG_X86_64
> +               desc = (void *)vmx->host_idt_base + vector * 16;
> +#else
> +               desc = (void *)vmx->host_idt_base + vector * 8;
> +#endif
> +
> +               entry = gate_offset(*desc);
> +               asm(
> +                       "mov %0, %%" _ASM_DX " \n\t"
> +#ifdef CONFIG_X86_64
> +                       "mov %%" _ASM_SP ", %%" _ASM_BX " \n\t"
> +                       "and $0xfffffffffffffff0, %%" _ASM_SP " \n\t"
> +                       "mov %%ss, %%" _ASM_AX " \n\t"
> +                       "push %%" _ASM_AX " \n\t"
> +                       "push %%" _ASM_BX " \n\t"
> +#endif

Are we sure no interrupts are using the IST feature?  I guess it's unlikely.

> +                       "pushf \n\t"
> +                       "pop %%" _ASM_AX " \n\t"
> +                       "or $0x200, %%" _ASM_AX " \n\t"
> +                       "push %%" _ASM_AX " \n\t"

Can simplify to pushf; orl $0x200, %%rsp.

> +                       "mov %%cs, %%" _ASM_AX " \n\t"
> +                       "push %%" _ASM_AX " \n\t"

push %%cs

> +                       "push intr_return \n\t"

push $1f.  Or even combine with the next instruction, and call %rdx.

> +                       "jmp *%% " _ASM_DX " \n\t"
> +                       "1: \n\t"
> +                       ".pushsection .rodata \n\t"
> +                       ".global intr_return \n\t"
> +                       "intr_return: " _ASM_PTR " 1b \n\t"
> +                       ".popsection \n\t"
> +                       : : "m"(entry) :
> +#ifdef CONFIG_X86_64
> +                       "rax", "rbx", "rdx"
> +#else
> +                       "eax", "edx"
> +#endif
> +                       );
> +       } else
> +               local_irq_enable();
> +}
> +
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z Feb. 20, 2013, 2:46 a.m. UTC | #2
Avi Kivity wrote on 2013-02-20:
> On Tue, Feb 19, 2013 at 3:39 PM, Yang Zhang <yang.z.zhang@intel.com> wrote:
>> From: Yang Zhang <yang.z.zhang@Intel.com>
>> 
>> The "acknowledge interrupt on exit" feature controls processor behavior
>> for external interrupt acknowledgement. When this control is set, the
>> processor acknowledges the interrupt controller to acquire the
>> interrupt vector on VM exit.
>> 
>> After enabling this feature, an interrupt which arrived when target cpu
>> is running in vmx non-root mode will be handled by vmx handler instead
>> of handler in idt. Currently, vmx handler only fakes an interrupt stack
>> and jump to idt table to let real handler to handle it. Further, we
>> will recognize the interrupt and only delivery the interrupt which not
>> belong to current vcpu through idt table. The interrupt which belonged
>> to current vcpu will be handled inside vmx handler. This will reduce
>> the interrupt handle cost of KVM.
>> 
>> Also, interrupt enable logic is changed if this feature is turnning on:
>> Before this patch, hypervior call local_irq_enable() to enable it directly.
>> Now IF bit is set on interrupt stack frame, and will be enabled on a return from
>> interrupt handler if exterrupt interrupt exists. If no external interrupt, still
>> call local_irq_enable() to enable it.
>> 
>> Refer to Intel SDM volum 3, chapter 33.2.
>> 
>> 
>> +static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) +{ +      
>> u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + +       /* +    
>>    * If external interrupt exists, IF bit is set in rflags/eflags on
>> the +        * interrupt stack frame, and interrupt will be enabled on
>> a return +        * from interrupt handler. +        */ +       if
>> ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) +
>>                       == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
>> +               unsigned int vector; +               unsigned long
>> entry; +               gate_desc *desc; +               struct vcpu_vmx
>> *vmx = to_vmx(vcpu); + +               vector =  exit_intr_info &
>> INTR_INFO_VECTOR_MASK; +#ifdef CONFIG_X86_64 +               desc =
>> (void *)vmx->host_idt_base + vector * 16; +#else +               desc =
>> (void *)vmx->host_idt_base + vector * 8; +#endif + +              
>> entry = gate_offset(*desc); +               asm( +                     
>>  "mov %0, %%" _ASM_DX " \n\t" +#ifdef CONFIG_X86_64 +                  
>>     "mov %%" _ASM_SP ", %%" _ASM_BX " \n\t" +                      
>> "and $0xfffffffffffffff0, %%" _ASM_SP " \n\t" +                      
>> "mov %%ss, %%" _ASM_AX " \n\t" +                       "push %%"
>> _ASM_AX " \n\t" +                       "push %%" _ASM_BX " \n\t"
>> +#endif
> 
> Are we sure no interrupts are using the IST feature?  I guess it's unlikely.
Linux uses IST for NMI, stack fault, machine-check, double fault and debug interrupt . No external interrupt will use it.
This feature is only for external interrupt. So we don't need to check it here.

> 
>> +                       "pushf \n\t"
>> +                       "pop %%" _ASM_AX " \n\t"
>> +                       "or $0x200, %%" _ASM_AX " \n\t"
>> +                       "push %%" _ASM_AX " \n\t"
> 
> Can simplify to pushf; orl $0x200, %%rsp.
Sure.

>> +                       "mov %%cs, %%" _ASM_AX " \n\t"
>> +                       "push %%" _ASM_AX " \n\t"
> 
> push %%cs
"push %%cs" is invalid in x86_64.

>> +                       "push intr_return \n\t"
> 
> push $1f.  Or even combine with the next instruction, and call %rdx.
Which is faster? jmp or call?

>> +                       "jmp *%% " _ASM_DX " \n\t"
>> +                       "1: \n\t"
>> +                       ".pushsection .rodata \n\t"
>> +                       ".global intr_return \n\t"
>> +                       "intr_return: " _ASM_PTR " 1b \n\t"
>> +                       ".popsection \n\t"
>> +                       : : "m"(entry) :
>> +#ifdef CONFIG_X86_64
>> +                       "rax", "rbx", "rdx"
>> +#else
>> +                       "eax", "edx"
>> +#endif
>> +                       );
>> +       } else
>> +               local_irq_enable();
>> +}
>> +


Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Feb. 20, 2013, 10:10 a.m. UTC | #3
On Wed, Feb 20, 2013 at 02:46:05AM +0000, Zhang, Yang Z wrote:
> Avi Kivity wrote on 2013-02-20:
> > On Tue, Feb 19, 2013 at 3:39 PM, Yang Zhang <yang.z.zhang@intel.com> wrote:
> >> From: Yang Zhang <yang.z.zhang@Intel.com>
> >> 
> >> The "acknowledge interrupt on exit" feature controls processor behavior
> >> for external interrupt acknowledgement. When this control is set, the
> >> processor acknowledges the interrupt controller to acquire the
> >> interrupt vector on VM exit.
> >> 
> >> After enabling this feature, an interrupt which arrived when target cpu
> >> is running in vmx non-root mode will be handled by vmx handler instead
> >> of handler in idt. Currently, vmx handler only fakes an interrupt stack
> >> and jump to idt table to let real handler to handle it. Further, we
> >> will recognize the interrupt and only delivery the interrupt which not
> >> belong to current vcpu through idt table. The interrupt which belonged
> >> to current vcpu will be handled inside vmx handler. This will reduce
> >> the interrupt handle cost of KVM.
> >> 
> >> Also, interrupt enable logic is changed if this feature is turnning on:
> >> Before this patch, hypervior call local_irq_enable() to enable it directly.
> >> Now IF bit is set on interrupt stack frame, and will be enabled on a return from
> >> interrupt handler if exterrupt interrupt exists. If no external interrupt, still
> >> call local_irq_enable() to enable it.
> >> 
> >> Refer to Intel SDM volum 3, chapter 33.2.
> >> 
> >> 
> >> +static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) +{ +      
> >> u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + +       /* +    
> >>    * If external interrupt exists, IF bit is set in rflags/eflags on
> >> the +        * interrupt stack frame, and interrupt will be enabled on
> >> a return +        * from interrupt handler. +        */ +       if
> >> ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) +
> >>                       == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
> >> +               unsigned int vector; +               unsigned long
> >> entry; +               gate_desc *desc; +               struct vcpu_vmx
> >> *vmx = to_vmx(vcpu); + +               vector =  exit_intr_info &
> >> INTR_INFO_VECTOR_MASK; +#ifdef CONFIG_X86_64 +               desc =
> >> (void *)vmx->host_idt_base + vector * 16; +#else +               desc =
> >> (void *)vmx->host_idt_base + vector * 8; +#endif + +              
> >> entry = gate_offset(*desc); +               asm( +                     
> >>  "mov %0, %%" _ASM_DX " \n\t" +#ifdef CONFIG_X86_64 +                  
> >>     "mov %%" _ASM_SP ", %%" _ASM_BX " \n\t" +                      
> >> "and $0xfffffffffffffff0, %%" _ASM_SP " \n\t" +                      
> >> "mov %%ss, %%" _ASM_AX " \n\t" +                       "push %%"
> >> _ASM_AX " \n\t" +                       "push %%" _ASM_BX " \n\t"
> >> +#endif
> > 
> > Are we sure no interrupts are using the IST feature?  I guess it's unlikely.
> Linux uses IST for NMI, stack fault, machine-check, double fault and debug interrupt . No external interrupt will use it.
> This feature is only for external interrupt. So we don't need to check it here.
> 
> > 
> >> +                       "pushf \n\t"
> >> +                       "pop %%" _ASM_AX " \n\t"
> >> +                       "or $0x200, %%" _ASM_AX " \n\t"
> >> +                       "push %%" _ASM_AX " \n\t"
> > 
> > Can simplify to pushf; orl $0x200, %%rsp.
> Sure.
> 
> >> +                       "mov %%cs, %%" _ASM_AX " \n\t"
> >> +                       "push %%" _ASM_AX " \n\t"
> > 
> > push %%cs
> "push %%cs" is invalid in x86_64.
> 
> >> +                       "push intr_return \n\t"
> > 
> > push $1f.  Or even combine with the next instruction, and call %rdx.
> Which is faster? jmp or call?
> 
Wrong question. You need to compare push+jmp with call. I do not see why
later will be slower.

> >> +                       "jmp *%% " _ASM_DX " \n\t"
> >> +                       "1: \n\t"
> >> +                       ".pushsection .rodata \n\t"
> >> +                       ".global intr_return \n\t"
> >> +                       "intr_return: " _ASM_PTR " 1b \n\t"
> >> +                       ".popsection \n\t"
> >> +                       : : "m"(entry) :
> >> +#ifdef CONFIG_X86_64
> >> +                       "rax", "rbx", "rdx"
> >> +#else
> >> +                       "eax", "edx"
> >> +#endif
> >> +                       );
> >> +       } else
> >> +               local_irq_enable();
> >> +}
> >> +
> 
> 
> Best regards,
> Yang
> 

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z Feb. 20, 2013, 11:07 a.m. UTC | #4
Gleb Natapov wrote on 2013-02-20:
> On Wed, Feb 20, 2013 at 02:46:05AM +0000, Zhang, Yang Z wrote:
>> Avi Kivity wrote on 2013-02-20:
>>> On Tue, Feb 19, 2013 at 3:39 PM, Yang Zhang <yang.z.zhang@intel.com>
> wrote:
>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
>>>> 
>>>> The "acknowledge interrupt on exit" feature controls processor behavior
>>>> for external interrupt acknowledgement. When this control is set, the
>>>> processor acknowledges the interrupt controller to acquire the
>>>> interrupt vector on VM exit.
>>>> 
>>>> After enabling this feature, an interrupt which arrived when target cpu
>>>> is running in vmx non-root mode will be handled by vmx handler instead
>>>> of handler in idt. Currently, vmx handler only fakes an interrupt stack
>>>> and jump to idt table to let real handler to handle it. Further, we
>>>> will recognize the interrupt and only delivery the interrupt which not
>>>> belong to current vcpu through idt table. The interrupt which belonged
>>>> to current vcpu will be handled inside vmx handler. This will reduce
>>>> the interrupt handle cost of KVM.
>>>> 
>>>> Also, interrupt enable logic is changed if this feature is turnning
>>>> on: Before this patch, hypervior call local_irq_enable() to enable it
>>>> directly. Now IF bit is set on interrupt stack frame, and will be
>>>> enabled on a return from interrupt handler if exterrupt interrupt
>>>> exists. If no external interrupt, still call local_irq_enable() to
>>>> enable it.
>>>> 
>>>> Refer to Intel SDM volum 3, chapter 33.2.
>>>> 
>>>> 
>>>> +static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) +{ +
>>>> u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + +       /* +
>>>>    * If external interrupt exists, IF bit is set in rflags/eflags on
>>>> the +        * interrupt stack frame, and interrupt will be enabled on
>>>> a return +        * from interrupt handler. +        */ +       if
>>>> ((exit_intr_info & (INTR_INFO_VALID_MASK |
> INTR_INFO_INTR_TYPE_MASK)) +
>>>>                       == (INTR_INFO_VALID_MASK |
> INTR_TYPE_EXT_INTR)) {
>>>> +               unsigned int vector; +               unsigned long
>>>> entry; +               gate_desc *desc; +               struct
>>>> vcpu_vmx *vmx = to_vmx(vcpu); + +               vector = 
>>>> exit_intr_info & INTR_INFO_VECTOR_MASK; +#ifdef CONFIG_X86_64 + desc
>>>> = (void *)vmx->host_idt_base + vector * 16; +#else +              
>>>> desc = (void *)vmx->host_idt_base + vector * 8; +#endif + + entry =
>>>> gate_offset(*desc); +               asm( +
>>>>  "mov %0, %%" _ASM_DX " \n\t" +#ifdef CONFIG_X86_64 +
>>>>     "mov %%" _ASM_SP ", %%" _ASM_BX " \n\t" +
>>>> "and $0xfffffffffffffff0, %%" _ASM_SP " \n\t" +
>>>> "mov %%ss, %%" _ASM_AX " \n\t" +                       "push %%"
>>>> _ASM_AX " \n\t" +                       "push %%" _ASM_BX " \n\t"
>>>> +#endif
>>> 
>>> Are we sure no interrupts are using the IST feature?  I guess it's unlikely.
>> Linux uses IST for NMI, stack fault, machine-check, double fault and
>> debug interrupt . No external interrupt will use it. This feature is
>> only for external interrupt. So we don't need to check it here.
>> 
>>> 
>>>> +                       "pushf \n\t"
>>>> +                       "pop %%" _ASM_AX " \n\t"
>>>> +                       "or $0x200, %%" _ASM_AX " \n\t"
>>>> +                       "push %%" _ASM_AX " \n\t"
>>> 
>>> Can simplify to pushf; orl $0x200, %%rsp.
>> Sure.
>> 
>>>> +                       "mov %%cs, %%" _ASM_AX " \n\t"
>>>> +                       "push %%" _ASM_AX " \n\t"
>>> 
>>> push %%cs
>> "push %%cs" is invalid in x86_64.
>> 
>>>> +                       "push intr_return \n\t"
>>> 
>>> push $1f.  Or even combine with the next instruction, and call %rdx.
>> Which is faster? jmp or call?
>> 
> Wrong question. You need to compare push+jmp with call. I do not see why
Sorry, I didn't express it clearly.  Yes, I want to compare push+jmp with call.

> later will be slower.
I think so. If push+jmp is not faster than call, I will use the latter.

>>>> +                       "jmp *%% " _ASM_DX " \n\t"
>>>> +                       "1: \n\t"
>>>> +                       ".pushsection .rodata \n\t"
>>>> +                       ".global intr_return \n\t"
>>>> +                       "intr_return: " _ASM_PTR " 1b \n\t"
>>>> +                       ".popsection \n\t"
>>>> +                       : : "m"(entry) :
>>>> +#ifdef CONFIG_X86_64
>>>> +                       "rax", "rbx", "rdx"
>>>> +#else
>>>> +                       "eax", "edx"
>>>> +#endif
>>>> +                       );
>>>> +       } else
>>>> +               local_irq_enable();
>>>> +}
>>>> +
>> 
>> 
>> Best regards,
>> Yang
>> 
> 
> --
> 			Gleb.


Best regards,
Yang

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Feb. 20, 2013, 12:35 p.m. UTC | #5
On Wed, Feb 20, 2013 at 4:46 AM, Zhang, Yang Z <yang.z.zhang@intel.com> wrote:
>>>
>>> +static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) +{ +
>>> u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + +       /* +
>>>    * If external interrupt exists, IF bit is set in rflags/eflags on
>>> the +        * interrupt stack frame, and interrupt will be enabled on
>>> a return +        * from interrupt handler. +        */ +       if
>>> ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) +
>>>                       == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
>>> +               unsigned int vector; +               unsigned long
>>> entry; +               gate_desc *desc; +               struct vcpu_vmx
>>> *vmx = to_vmx(vcpu); + +               vector =  exit_intr_info &
>>> INTR_INFO_VECTOR_MASK; +#ifdef CONFIG_X86_64 +               desc =
>>> (void *)vmx->host_idt_base + vector * 16; +#else +               desc =
>>> (void *)vmx->host_idt_base + vector * 8; +#endif + +
>>> entry = gate_offset(*desc); +               asm( +
>>>  "mov %0, %%" _ASM_DX " \n\t" +#ifdef CONFIG_X86_64 +
>>>     "mov %%" _ASM_SP ", %%" _ASM_BX " \n\t" +
>>> "and $0xfffffffffffffff0, %%" _ASM_SP " \n\t" +
>>> "mov %%ss, %%" _ASM_AX " \n\t" +                       "push %%"
>>> _ASM_AX " \n\t" +                       "push %%" _ASM_BX " \n\t"
>>> +#endif
>>
>> Are we sure no interrupts are using the IST feature?  I guess it's unlikely.
> Linux uses IST for NMI, stack fault, machine-check, double fault and debug interrupt . No external interrupt will use it.
> This feature is only for external interrupt. So we don't need to check it here.

Ok, thanks for checking.

>
>>
>>> +                       "pushf \n\t"
>>> +                       "pop %%" _ASM_AX " \n\t"
>>> +                       "or $0x200, %%" _ASM_AX " \n\t"
>>> +                       "push %%" _ASM_AX " \n\t"
>>
>> Can simplify to pushf; orl $0x200, %%rsp.
> Sure.
>
>>> +                       "mov %%cs, %%" _ASM_AX " \n\t"
>>> +                       "push %%" _ASM_AX " \n\t"
>>
>> push %%cs
> "push %%cs" is invalid in x86_64.

Oops. 'push[lq] $__KERNEL_CS' then.

>
>>> +                       "push intr_return \n\t"
>>
>> push $1f.  Or even combine with the next instruction, and call %rdx.
> Which is faster? jmp or call?

Actually it doesn't matter, the processor is clever enough to minimize
the difference.  But the code is simpler and shorter with 'call'.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z Feb. 20, 2013, 1:10 p.m. UTC | #6
Avi Kivity wrote on 2013-02-20:
> On Wed, Feb 20, 2013 at 4:46 AM, Zhang, Yang Z <yang.z.zhang@intel.com>
> wrote:
>>>> 
>>>> +static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) +{ +
>>>> u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + +       /* +
>>>>    * If external interrupt exists, IF bit is set in rflags/eflags on
>>>> the +        * interrupt stack frame, and interrupt will be enabled on
>>>> a return +        * from interrupt handler. +        */ +       if
>>>> ((exit_intr_info & (INTR_INFO_VALID_MASK |
> INTR_INFO_INTR_TYPE_MASK)) +
>>>>                       == (INTR_INFO_VALID_MASK |
> INTR_TYPE_EXT_INTR)) {
>>>> +               unsigned int vector; +               unsigned long
>>>> entry; +               gate_desc *desc; +               struct
>>>> vcpu_vmx *vmx = to_vmx(vcpu); + +               vector = 
>>>> exit_intr_info & INTR_INFO_VECTOR_MASK; +#ifdef CONFIG_X86_64 + desc
>>>> = (void *)vmx->host_idt_base + vector * 16; +#else +              
>>>> desc = (void *)vmx->host_idt_base + vector * 8; +#endif + + entry =
>>>> gate_offset(*desc); +               asm( +
>>>>  "mov %0, %%" _ASM_DX " \n\t" +#ifdef CONFIG_X86_64 +
>>>>     "mov %%" _ASM_SP ", %%" _ASM_BX " \n\t" +
>>>> "and $0xfffffffffffffff0, %%" _ASM_SP " \n\t" +
>>>> "mov %%ss, %%" _ASM_AX " \n\t" +                       "push %%"
>>>> _ASM_AX " \n\t" +                       "push %%" _ASM_BX " \n\t"
>>>> +#endif
>>> 
>>> Are we sure no interrupts are using the IST feature?  I guess it's unlikely.
>> Linux uses IST for NMI, stack fault, machine-check, double fault and
>> debug interrupt . No external interrupt will use it. This feature is
>> only for external interrupt. So we don't need to check it here.
> 
> Ok, thanks for checking.
> 
>> 
>>> 
>>>> +                       "pushf \n\t"
>>>> +                       "pop %%" _ASM_AX " \n\t"
>>>> +                       "or $0x200, %%" _ASM_AX " \n\t"
>>>> +                       "push %%" _ASM_AX " \n\t"
>>> 
>>> Can simplify to pushf; orl $0x200, %%rsp.
>> Sure.
>> 
>>>> +                       "mov %%cs, %%" _ASM_AX " \n\t"
>>>> +                       "push %%" _ASM_AX " \n\t"
>>> 
>>> push %%cs
>> "push %%cs" is invalid in x86_64.
> 
> Oops. 'push[lq] $__KERNEL_CS' then.
Is this right? Just copy it from other file.

#define __STR(X) #X
#define STR(X) __STR(X)

#ifdef CONFIG_X86_64
                        "pushq $"STR(__KERNEL_CS)" \n\t"
#else
                        "pushl $"STR(__KERNEL_CS)" \n\t"
#endif

#undef STR
#undef __STR
 
>> 
>>>> +                       "push intr_return \n\t"
>>> 
>>> push $1f.  Or even combine with the next instruction, and call %rdx.
>> Which is faster? jmp or call?
> 
> Actually it doesn't matter, the processor is clever enough to minimize
> the difference.  But the code is simpler and shorter with 'call'. -- To
Yes, 'call' is better.

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Feb. 20, 2013, 3:10 p.m. UTC | #7
On Wed, Feb 20, 2013 at 3:10 PM, Zhang, Yang Z <yang.z.zhang@intel.com> wrote:
>>>>
>>>> push %%cs
>>> "push %%cs" is invalid in x86_64.
>>
>> Oops. 'push[lq] $__KERNEL_CS' then.
> Is this right? Just copy it from other file.
>
> #define __STR(X) #X
> #define STR(X) __STR(X)
>
> #ifdef CONFIG_X86_64
>                         "pushq $"STR(__KERNEL_CS)" \n\t"
> #else
>                         "pushl $"STR(__KERNEL_CS)" \n\t"
> #endif
>
> #undef STR
> #undef __STR
>

Use __ASM_SIZE and an immediate operand for __KERNEL_CS:

 asm ( ... : : [cs]"i"(__KERNEL_CS) );

and the code will be cleaner.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z Feb. 21, 2013, 8:06 a.m. UTC | #8
Eric Northup wrote on 2013-02-21:
> On Tue, Feb 19, 2013 at 6:46 PM, Zhang, Yang Z <yang.z.zhang@intel.com> wrote:
>> 
>> Avi Kivity wrote on 2013-02-20:
>>> On Tue, Feb 19, 2013 at 3:39 PM, Yang Zhang <yang.z.zhang@intel.com>
>>> wrote:
>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
>>>> 
>>>> The "acknowledge interrupt on exit" feature controls processor behavior
>>>> for external interrupt acknowledgement. When this control is set, the
>>>> processor acknowledges the interrupt controller to acquire the
>>>> interrupt vector on VM exit.
>>>> 
>>>> After enabling this feature, an interrupt which arrived when target cpu
>>>> is running in vmx non-root mode will be handled by vmx handler instead
>>>> of handler in idt. Currently, vmx handler only fakes an interrupt stack
>>>> and jump to idt table to let real handler to handle it. Further, we
>>>> will recognize the interrupt and only delivery the interrupt which not
>>>> belong to current vcpu through idt table. The interrupt which belonged
>>>> to current vcpu will be handled inside vmx handler. This will reduce
>>>> the interrupt handle cost of KVM.
>>>> 
>>>> Also, interrupt enable logic is changed if this feature is turnning on:
>>>> Before this patch, hypervior call local_irq_enable() to enable it
>>>> directly.
>>>> Now IF bit is set on interrupt stack frame, and will be enabled on a
>>>> return from
>>>> interrupt handler if exterrupt interrupt exists. If no external
>>>> interrupt, still
>>>> call local_irq_enable() to enable it.
>>>> 
>>>> Refer to Intel SDM volum 3, chapter 33.2.
>>>> 
>>>> 
>>>> +static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) +{ +
>>>> u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + +       /* +
>>>>    * If external interrupt exists, IF bit is set in rflags/eflags on
>>>> the +        * interrupt stack frame, and interrupt will be enabled on
>>>> a return +        * from interrupt handler. +        */ +       if
>>>> ((exit_intr_info & (INTR_INFO_VALID_MASK |
> INTR_INFO_INTR_TYPE_MASK)) +
>>>>                       == (INTR_INFO_VALID_MASK |
> INTR_TYPE_EXT_INTR)) {
>>>> +               unsigned int vector; +               unsigned long
>>>> entry; +               gate_desc *desc; +               struct
>>>> vcpu_vmx *vmx = to_vmx(vcpu); + +               vector = 
>>>> exit_intr_info & INTR_INFO_VECTOR_MASK; +#ifdef CONFIG_X86_64 + desc
>>>> = (void *)vmx->host_idt_base + vector * 16; +#else +              
>>>> desc = (void *)vmx->host_idt_base + vector * 8; +#endif + + entry =
>>>> gate_offset(*desc); +               asm( +
>>>>  "mov %0, %%" _ASM_DX " \n\t" +#ifdef CONFIG_X86_64 +
>>>>     "mov %%" _ASM_SP ", %%" _ASM_BX " \n\t" +
>>>> "and $0xfffffffffffffff0, %%" _ASM_SP " \n\t" +
>>>> "mov %%ss, %%" _ASM_AX " \n\t" +                       "push %%"
>>>> _ASM_AX " \n\t" +                       "push %%" _ASM_BX " \n\t"
>>>> +#endif
>>> 
>>> Are we sure no interrupts are using the IST feature?  I guess it's
>>> unlikely.
>> Linux uses IST for NMI, stack fault, machine-check, double fault and debug
>> interrupt . No external interrupt will use it.
>> This feature is only for external interrupt. So we don't need to check it
>> here.
> 
> Would it be silly paranoia to BUG_ON(desc->ist) ?
I think the BUG_ON is reasonable. I will add it in next version.

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z Feb. 21, 2013, 8:58 a.m. UTC | #9
Avi Kivity wrote on 2013-02-20:
> On Wed, Feb 20, 2013 at 3:10 PM, Zhang, Yang Z <yang.z.zhang@intel.com>
> wrote:
>>>>> 
>>>>> push %%cs
>>>> "push %%cs" is invalid in x86_64.
>>> 
>>> Oops. 'push[lq] $__KERNEL_CS' then.
>> Is this right? Just copy it from other file.
>> 
>> #define __STR(X) #X
>> #define STR(X) __STR(X)
>> 
>> #ifdef CONFIG_X86_64
>>                         "pushq $"STR(__KERNEL_CS)" \n\t" #else "pushl
>>                         $"STR(__KERNEL_CS)" \n\t"
>> #endif
>> 
>> #undef STR
>> #undef __STR
>> 
> 
> Use __ASM_SIZE and an immediate operand for __KERNEL_CS:
> 
>  asm ( ... : : [cs]"i"(__KERNEL_CS) );
> and the code will be cleaner.
Thanks. Here is code after changing, please review it:

                asm(
                        "mov %0, %%" _ASM_DX " \n\t"
#ifdef CONFIG_X86_64
                        "mov %%" _ASM_SP ", %%" _ASM_BX " \n\t"
                        "and $0xfffffffffffffff0, %%" _ASM_SP " \n\t"
                        "mov %%ss, %%" _ASM_AX " \n\t"
                        "push %%" _ASM_AX " \n\t"
                        "push %%" _ASM_BX " \n\t"
#endif
                        "pushf \n\t"
                        "orl $0x200, (%%" _ASM_SP ") \n\t"
                        __ASM_SIZE(push) " %c[cs] \n\t"
                        "call *%% " _ASM_DX " \n\t"
                        : : "m"(entry), [cs]"i"(__KERNEL_CS) :
#ifdef CONFIG_X86_64
                        "rax", "rbx", "rdx"
#else
                        "edx"
#endif
                        );


Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Feb. 21, 2013, 9:22 a.m. UTC | #10
On Thu, Feb 21, 2013 at 10:58 AM, Zhang, Yang Z <yang.z.zhang@intel.com> wrote:
> Thanks. Here is code after changing, please review it:
>
>                 asm(
>                         "mov %0, %%" _ASM_DX " \n\t"
> #ifdef CONFIG_X86_64
>                         "mov %%" _ASM_SP ", %%" _ASM_BX " \n\t"
>                         "and $0xfffffffffffffff0, %%" _ASM_SP " \n\t"
>                         "mov %%ss, %%" _ASM_AX " \n\t"
>                         "push %%" _ASM_AX " \n\t"
>                         "push %%" _ASM_BX " \n\t"
> #endif
>                         "pushf \n\t"
>                         "orl $0x200, (%%" _ASM_SP ") \n\t"
>                         __ASM_SIZE(push) " %c[cs] \n\t"
>                         "call *%% " _ASM_DX " \n\t"
>                         : : "m"(entry), [cs]"i"(__KERNEL_CS) :
> #ifdef CONFIG_X86_64
>                         "rax", "rbx", "rdx"
> #else
>                         "edx"
> #endif
>                         );

For cleanliness, you can provide more parameters via the constraints:

  "m"(entry) -> [entry]"r"(entry) (and then use "call *[entry]")
  use "push %c[ss]" for ss
  and [sp]"=&r"(tmp) instead of rdx/edx as the temporary for rsp
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z Feb. 22, 2013, 2:50 a.m. UTC | #11
Avi Kivity wrote on 2013-02-21:
> On Thu, Feb 21, 2013 at 10:58 AM, Zhang, Yang Z <yang.z.zhang@intel.com>
> wrote:
>> Thanks. Here is code after changing, please review it:
>> 
>>                 asm(
>>                         "mov %0, %%" _ASM_DX " \n\t" #ifdef
>>                         CONFIG_X86_64 "mov %%" _ASM_SP ", %%" _ASM_BX "
>>                         \n\t" "and $0xfffffffffffffff0, %%" _ASM_SP "
>>                         \n\t" "mov %%ss, %%" _ASM_AX " \n\t" "push %%"
>>                         _ASM_AX " \n\t" "push %%" _ASM_BX " \n\t"
>>                         #endif "pushf \n\t" "orl $0x200, (%%" _ASM_SP
>>                         ") \n\t" __ASM_SIZE(push) " %c[cs] \n\t" "call
>>                         *%% " _ASM_DX " \n\t" : : "m"(entry),
>>                         [cs]"i"(__KERNEL_CS) : #ifdef CONFIG_X86_64
>>                         "rax", "rbx", "rdx" #else "edx" #endif );
> 
> For cleanliness, you can provide more parameters via the constraints:
> 
>   "m"(entry) -> [entry]"r"(entry) (and then use "call *[entry]")
>   use "push %c[ss]" for ss
>   and [sp]"=&r"(tmp) instead of rdx/edx as the temporary for rsp
I didn't found the __KERNEL_SS definition. But I noticed that KVM use __KERNEL_DS as HOST_SS_SELECTOR.
Please review the following changes.

                asm volatile(
#ifdef CONFIG_X86_64
                        "mov %%" _ASM_SP ", %[sp] \n\t"
                        "and $0xfffffffffffffff0, %%" _ASM_SP " \n\t"
                        "push $%c[ss] \n\t"
                        "push %[sp] \n\t"
#endif
                        "pushf \n\t"
                        "orl $0x200, (%%" _ASM_SP ") \n\t"
                        __ASM_SIZE(push) " $%c[cs] \n\t"
                        "call *%[entry] \n\t"
                        :
#ifdef CONFIG_X86_64
                        [sp]"=&r"(tmp)
#endif
                        : [entry]"r"(entry),
                        [ss]"i"(__KERNEL_DS),
                        [cs]"i"(__KERNEL_CS)
                        );


Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 635a74d..b8388e9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -730,6 +730,7 @@  struct kvm_x86_ops {
 	int (*check_intercept)(struct kvm_vcpu *vcpu,
 			       struct x86_instruction_info *info,
 			       enum x86_intercept_stage stage);
+	void (*handle_external_intr)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e1b1ce2..a7d60d7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4247,6 +4247,11 @@  out:
 	return ret;
 }
 
+static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
+{
+	local_irq_enable();
+}
+
 static struct kvm_x86_ops svm_x86_ops = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
@@ -4342,6 +4347,7 @@  static struct kvm_x86_ops svm_x86_ops = {
 	.set_tdp_cr3 = set_tdp_cr3,
 
 	.check_intercept = svm_check_intercept,
+	.handle_external_intr = svm_handle_external_intr,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6667042..436b134 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -377,6 +377,7 @@  struct vcpu_vmx {
 	struct shared_msr_entry *guest_msrs;
 	int                   nmsrs;
 	int                   save_nmsrs;
+	unsigned long	      host_idt_base;
 #ifdef CONFIG_X86_64
 	u64 		      msr_host_kernel_gs_base;
 	u64 		      msr_guest_kernel_gs_base;
@@ -2605,7 +2606,8 @@  static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 #ifdef CONFIG_X86_64
 	min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
 #endif
-	opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
+	opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
+		VM_EXIT_ACK_INTR_ON_EXIT;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
 				&_vmexit_control) < 0)
 		return -EIO;
@@ -3875,7 +3877,7 @@  static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
  * Note that host-state that does change is set elsewhere. E.g., host-state
  * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
  */
-static void vmx_set_constant_host_state(void)
+static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 {
 	u32 low32, high32;
 	unsigned long tmpl;
@@ -3903,6 +3905,7 @@  static void vmx_set_constant_host_state(void)
 
 	native_store_idt(&dt);
 	vmcs_writel(HOST_IDTR_BASE, dt.address);   /* 22.2.4 */
+	vmx->host_idt_base = dt.address;
 
 	vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
 
@@ -4035,7 +4038,7 @@  static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
 	vmcs_write16(HOST_FS_SELECTOR, 0);            /* 22.2.4 */
 	vmcs_write16(HOST_GS_SELECTOR, 0);            /* 22.2.4 */
-	vmx_set_constant_host_state();
+	vmx_set_constant_host_state(vmx);
 #ifdef CONFIG_X86_64
 	rdmsrl(MSR_FS_BASE, a);
 	vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
@@ -6346,6 +6349,63 @@  static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 	}
 }
 
+static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
+{
+	u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
+	/*
+	 * If external interrupt exists, IF bit is set in rflags/eflags on the
+	 * interrupt stack frame, and interrupt will be enabled on a return
+	 * from interrupt handler.
+	 */
+	if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
+			== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
+		unsigned int vector;
+		unsigned long entry;
+		gate_desc *desc;
+		struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+		vector =  exit_intr_info & INTR_INFO_VECTOR_MASK;
+#ifdef CONFIG_X86_64
+		desc = (void *)vmx->host_idt_base + vector * 16;
+#else
+		desc = (void *)vmx->host_idt_base + vector * 8;
+#endif
+
+		entry = gate_offset(*desc);
+		asm(
+			"mov %0, %%" _ASM_DX " \n\t"
+#ifdef CONFIG_X86_64
+			"mov %%" _ASM_SP ", %%" _ASM_BX " \n\t"
+			"and $0xfffffffffffffff0, %%" _ASM_SP " \n\t"
+			"mov %%ss, %%" _ASM_AX " \n\t"
+			"push %%" _ASM_AX " \n\t"
+			"push %%" _ASM_BX " \n\t"
+#endif
+			"pushf \n\t"
+			"pop %%" _ASM_AX " \n\t"
+			"or $0x200, %%" _ASM_AX " \n\t"
+			"push %%" _ASM_AX " \n\t"
+			"mov %%cs, %%" _ASM_AX " \n\t"
+			"push %%" _ASM_AX " \n\t"
+			"push intr_return \n\t"
+			"jmp *%% " _ASM_DX " \n\t"
+			"1: \n\t"
+			".pushsection .rodata \n\t"
+			".global intr_return \n\t"
+			"intr_return: " _ASM_PTR " 1b \n\t"
+			".popsection \n\t"
+			: : "m"(entry) :
+#ifdef CONFIG_X86_64
+			"rax", "rbx", "rdx"
+#else
+			"eax", "edx"
+#endif
+			);
+	} else
+		local_irq_enable();
+}
+
 static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 {
 	u32 exit_intr_info;
@@ -7016,7 +7076,7 @@  static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	 * Other fields are different per CPU, and will be set later when
 	 * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
 	 */
-	vmx_set_constant_host_state();
+	vmx_set_constant_host_state(vmx);
 
 	/*
 	 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
@@ -7618,6 +7678,7 @@  static struct kvm_x86_ops vmx_x86_ops = {
 	.set_tdp_cr3 = vmx_set_cr3,
 
 	.check_intercept = vmx_check_intercept,
+	.handle_external_intr = vmx_handle_external_intr,
 };
 
 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3c5bb6f..f1fa37e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5784,7 +5784,9 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	smp_wmb();
-	local_irq_enable();
+
+	/* Interrupt is enabled by handle_external_intr() */
+	kvm_x86_ops->handle_external_intr(vcpu);
 
 	++vcpu->stat.exits;