diff mbox

[v1,1/4] KVM: MMU: check guest CR3 reserved bits based on its physical address width.

Message ID 1502544906-1108-2-git-send-email-yu.c.zhang@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Yu Zhang Aug. 12, 2017, 1:35 p.m. UTC
Currently, KVM uses CR3_L_MODE_RESERVED_BITS to check the
reserved bits in CR3. Yet the length of reserved bits in
guest CR3 should be based on the physical address width
exposed to the VM. This patch changes CR3 check logic to
calculate the reserved bits at runtime.

Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
---
 arch/x86/include/asm/kvm_host.h |  1 -
 arch/x86/kvm/emulate.c          | 12 ++++++++++--
 arch/x86/kvm/x86.c              |  8 ++++----
 3 files changed, 14 insertions(+), 7 deletions(-)

Comments

Paolo Bonzini Aug. 14, 2017, 7:36 a.m. UTC | #1
On 12/08/2017 15:35, Yu Zhang wrote:
> +			ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL, NULL);
> +			maxphyaddr = eax * 0xff;

This is "&", not "*".  You can also use rsvd_bits here.

> +			rsvd = (~((1UL << maxphyaddr) - 1)) &
> +				~CR3_PCID_INVD;
> +		}
>  
>  		if (new_val & rsvd)
>  			return emulate_gp(ctxt, 0);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e40a779..d9100c4 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -813,10 +813,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
>  		return 0;
>  	}
>  
> -	if (is_long_mode(vcpu)) {
> -		if (cr3 & CR3_L_MODE_RESERVED_BITS)
> -			return 1;
> -	} else if (is_pae(vcpu) && is_paging(vcpu) &&
> +	if (is_long_mode(vcpu) &&
> +	    (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
> +		return 1;

62 is a little better, since 63 is the PCID invalidate bit.

Paolo

> +	else if (is_pae(vcpu) && is_paging(vcpu) &&
>  		   !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
>  		return 1;
>
Yu Zhang Aug. 14, 2017, 11:39 a.m. UTC | #2
On 8/14/2017 3:36 PM, Paolo Bonzini wrote:
> On 12/08/2017 15:35, Yu Zhang wrote:
>> +			ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL, NULL);
>> +			maxphyaddr = eax * 0xff;
> This is "&", not "*".  You can also use rsvd_bits here.

Oops. Sorry for the typo. :-)

>> +			rsvd = (~((1UL << maxphyaddr) - 1)) &
>> +				~CR3_PCID_INVD;
>> +		}
>>   
>>   		if (new_val & rsvd)
>>   			return emulate_gp(ctxt, 0);
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index e40a779..d9100c4 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -813,10 +813,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
>>   		return 0;
>>   	}
>>   
>> -	if (is_long_mode(vcpu)) {
>> -		if (cr3 & CR3_L_MODE_RESERVED_BITS)
>> -			return 1;
>> -	} else if (is_pae(vcpu) && is_paging(vcpu) &&
>> +	if (is_long_mode(vcpu) &&
>> +	    (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
>> +		return 1;
> 62 is a little better, since 63 is the PCID invalidate bit.

Got it. Both kvm_set_cr3() and check_cr_write() should use 
rsvd_bits(maxphyaddr, 62) .

Thanks
Yu

> Paolo
>
>> +	else if (is_pae(vcpu) && is_paging(vcpu) &&
>>   		   !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
>>   		return 1;
>>   
>
Jim Mattson Aug. 14, 2017, 4:13 p.m. UTC | #3
On Sat, Aug 12, 2017 at 6:35 AM, Yu Zhang <yu.c.zhang@linux.intel.com> wrote:
> Currently, KVM uses CR3_L_MODE_RESERVED_BITS to check the
> reserved bits in CR3. Yet the length of reserved bits in
> guest CR3 should be based on the physical address width
> exposed to the VM. This patch changes CR3 check logic to
> calculate the reserved bits at runtime.
>
> Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  1 -
>  arch/x86/kvm/emulate.c          | 12 ++++++++++--
>  arch/x86/kvm/x86.c              |  8 ++++----
>  3 files changed, 14 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 9e4862e..018300e 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -79,7 +79,6 @@
>                           | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
>                           | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
>
> -#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
>  #define CR3_PCID_INVD           BIT_64(63)
>  #define CR4_RESERVED_BITS                                               \
>         (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index fb00559..a98b88a 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -4097,8 +4097,16 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
>                 u64 rsvd = 0;
>
>                 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
> -               if (efer & EFER_LMA)
> -                       rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
> +               if (efer & EFER_LMA) {
> +                       u64 maxphyaddr;
> +                       u32 eax = 0x80000008;
> +
> +                       ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL, NULL);
> +                       maxphyaddr = eax * 0xff;

What if leaf 0x80000008 is not defined?

> +
> +                       rsvd = (~((1UL << maxphyaddr) - 1)) &
> +                               ~CR3_PCID_INVD;
> +               }
>
>                 if (new_val & rsvd)
>                         return emulate_gp(ctxt, 0);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e40a779..d9100c4 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -813,10 +813,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
>                 return 0;
>         }
>
> -       if (is_long_mode(vcpu)) {
> -               if (cr3 & CR3_L_MODE_RESERVED_BITS)
> -                       return 1;
> -       } else if (is_pae(vcpu) && is_paging(vcpu) &&
> +       if (is_long_mode(vcpu) &&
> +           (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
> +               return 1;
> +       else if (is_pae(vcpu) && is_paging(vcpu) &&
>                    !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
>                 return 1;
>
> --
> 2.5.0
>
Paolo Bonzini Aug. 14, 2017, 4:40 p.m. UTC | #4
On 14/08/2017 18:13, Jim Mattson wrote:
>>                 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
>> -               if (efer & EFER_LMA)
>> -                       rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
>> +               if (efer & EFER_LMA) {
>> +                       u64 maxphyaddr;
>> +                       u32 eax = 0x80000008;
>> +
>> +                       ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL, NULL);
>> +                       maxphyaddr = eax * 0xff;
>
> What if leaf 0x80000008 is not defined?

I noticed this too, but I thought it was mitigated by being under
EFER_LMA.  Unfortunately, kvm_set_efer doesn't check
guest_cpuid_has_longmode, so I guess you do have to test leaf 0x80000000
first.  Alternatively:

1) kvm_cpuid could return false if it's falling back to
check_cpuid_limit, and emulator_get_cpuid can then be changed to return bool

2) kvm_cpuid and emulator_get_cpuid could gain a new argument to disable
the check_cpuid_limit fallback.

Yu, would you like to implement the latter?

Paolo
Yu Zhang Aug. 15, 2017, 7:50 a.m. UTC | #5
On 8/15/2017 12:40 AM, Paolo Bonzini wrote:
> On 14/08/2017 18:13, Jim Mattson wrote:
>>>                  ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
>>> -               if (efer & EFER_LMA)
>>> -                       rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
>>> +               if (efer & EFER_LMA) {
>>> +                       u64 maxphyaddr;
>>> +                       u32 eax = 0x80000008;
>>> +
>>> +                       ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL, NULL);
>>> +                       maxphyaddr = eax * 0xff;
>> What if leaf 0x80000008 is not defined?
> I noticed this too, but I thought it was mitigated by being under
> EFER_LMA.  Unfortunately, kvm_set_efer doesn't check
> guest_cpuid_has_longmode, so I guess you do have to test leaf 0x80000000
> first.  Alternatively:
>
> 1) kvm_cpuid could return false if it's falling back to
> check_cpuid_limit, and emulator_get_cpuid can then be changed to return bool
>
> 2) kvm_cpuid and emulator_get_cpuid could gain a new argument to disable
> the check_cpuid_limit fallback.
>
> Yu, would you like to implement the latter?

Thanks for pointing this out, Jim & Paolo. The latter choice sounds 
better to me. :-)
I'd like to implement this in a separate patch in next version patch set.

Yu
> Paolo
>
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9e4862e..018300e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,7 +79,6 @@ 
 			  | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
 			  | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
 
-#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
 #define CR3_PCID_INVD		 BIT_64(63)
 #define CR4_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index fb00559..a98b88a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4097,8 +4097,16 @@  static int check_cr_write(struct x86_emulate_ctxt *ctxt)
 		u64 rsvd = 0;
 
 		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
-		if (efer & EFER_LMA)
-			rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
+		if (efer & EFER_LMA) {
+			u64 maxphyaddr;
+			u32 eax = 0x80000008;
+
+			ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL, NULL);
+			maxphyaddr = eax * 0xff;
+
+			rsvd = (~((1UL << maxphyaddr) - 1)) &
+				~CR3_PCID_INVD;
+		}
 
 		if (new_val & rsvd)
 			return emulate_gp(ctxt, 0);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e40a779..d9100c4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -813,10 +813,10 @@  int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		return 0;
 	}
 
-	if (is_long_mode(vcpu)) {
-		if (cr3 & CR3_L_MODE_RESERVED_BITS)
-			return 1;
-	} else if (is_pae(vcpu) && is_paging(vcpu) &&
+	if (is_long_mode(vcpu) &&
+	    (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
+		return 1;
+	else if (is_pae(vcpu) && is_paging(vcpu) &&
 		   !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
 		return 1;