diff mbox

[v1] KVM: VMX: enable guest access to LMCE related MSRs

Message ID 20160603060851.17018-2-haozhong.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Haozhong Zhang June 3, 2016, 6:08 a.m. UTC
On Intel platforms, this patch adds LMCE to KVM MCE supported
capabilities and handles guest access to LMCE related MSRs.

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/vmx.c              | 27 +++++++++++++++++++++++++--
 arch/x86/kvm/x86.c              | 12 ++++++------
 3 files changed, 34 insertions(+), 8 deletions(-)

Comments

Radim Krčmář June 3, 2016, 3:34 p.m. UTC | #1
2016-06-03 14:08+0800, Haozhong Zhang:
> On Intel platforms, this patch adds LMCE to KVM MCE supported
> capabilities and handles guest access to LMCE related MSRs.
> 
> Signed-off-by: Ashok Raj <ashok.raj@intel.com>
> Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> ---
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> @@ -2863,6 +2863,11 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
>  	return 0;
>  }
>  
> +static inline bool vmx_feature_control_msr_required(struct kvm_vcpu *vcpu)

I'd call it "present", rather than "required".  SDM does so for other
MSRs and it is easier to understand in the condition that returns #GP if
this function is false.

> +{
> +	return nested_vmx_allowed(vcpu) || (vcpu->arch.mcg_cap & MCG_LMCE_P);
> +}
> @@ -2904,8 +2909,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	case MSR_IA32_FEATURE_CONTROL:
> -		if (!nested_vmx_allowed(vcpu))
> +		if (!vmx_feature_control_msr_required(vcpu))
>  			return 1;
>  		msr_info->data = to_vmx(vcpu)->nested.msr_ia32_feature_control;

(MSR_IA32_FEATURE_CONTROL does not depend only on nested anymore, so
 moving msr_ia32_feature_control from struct nested_vmx to struct
 vcpu_vmx would make sense.)

>  		break;
> @@ -2997,8 +3009,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> +	case MSR_IA32_MCG_EXT_CTL:
> +		if (!(vcpu->arch.mcg_cap & MCG_LMCE_P) ||
> +		    !(to_vmx(vcpu)->nested.msr_ia32_feature_control &
> +		      FEATURE_CONTROL_LMCE))

(This check is used twice and could be given a name too,
 "vmx_mcg_ext_ctl_msr_present()"?)

> +			return 1;
> +		if (data && data != 0x1)

  (data & ~MCG_EXT_CTL_LMCE_EN)

is a clearer check for reserved bits.

> +			return -1;
> +		vcpu->arch.mcg_ext_ctl = data;
> +		break;
>  	case MSR_IA32_FEATURE_CONTROL:
> -		if (!nested_vmx_allowed(vcpu) ||
> +		if (!vmx_feature_control_msr_required(vcpu) ||
>  		    (to_vmx(vcpu)->nested.msr_ia32_feature_control &
>  		     FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
>  			return 1;

Does hardware throw #GP when FEATURE_CONTROL_LMCE is set without
MCG_LMCE_P?

(We could emulate that by having a mask of valid bits and also use that
 mask in place of vmx_feature_control_msr_required().  I don't think
 there is a reason to have only vmx_feature_control_msr_required() if
 the hardware can #GP on individual bits too.)

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Borislav Petkov June 4, 2016, 11:01 a.m. UTC | #2
Haozhong Zhang <haozhong.zhang@intel.com> wrote:

>On Intel platforms, this patch adds LMCE to KVM MCE supported
>capabilities and handles guest access to LMCE related MSRs.
>
>Signed-off-by: Ashok Raj <ashok.raj@intel.com>
>Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>

SOB chain needs correction wrt who the author is and who the submitter.
Haozhong Zhang June 5, 2016, 3:09 p.m. UTC | #3
On 06/03/16 17:34, Radim Krčmář wrote:
> 2016-06-03 14:08+0800, Haozhong Zhang:
> > On Intel platforms, this patch adds LMCE to KVM MCE supported
> > capabilities and handles guest access to LMCE related MSRs.
> > 
> > Signed-off-by: Ashok Raj <ashok.raj@intel.com>
> > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> > ---
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > @@ -2863,6 +2863,11 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
> >  	return 0;
> >  }
> >  
> > +static inline bool vmx_feature_control_msr_required(struct kvm_vcpu *vcpu)
> 
> I'd call it "present", rather than "required".  SDM does so for other
> MSRs and it is easier to understand in the condition that returns #GP if
> this function is false.
>

Agree, I'll change.

> > +{
> > +	return nested_vmx_allowed(vcpu) || (vcpu->arch.mcg_cap & MCG_LMCE_P);
> > +}
> > @@ -2904,8 +2909,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >  	case MSR_IA32_FEATURE_CONTROL:
> > -		if (!nested_vmx_allowed(vcpu))
> > +		if (!vmx_feature_control_msr_required(vcpu))
> >  			return 1;
> >  		msr_info->data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
> 
> (MSR_IA32_FEATURE_CONTROL does not depend only on nested anymore, so
>  moving msr_ia32_feature_control from struct nested_vmx to struct
>  vcpu_vmx would make sense.)
>

will move in the next version

> >  		break;
> > @@ -2997,8 +3009,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > +	case MSR_IA32_MCG_EXT_CTL:
> > +		if (!(vcpu->arch.mcg_cap & MCG_LMCE_P) ||
> > +		    !(to_vmx(vcpu)->nested.msr_ia32_feature_control &
> > +		      FEATURE_CONTROL_LMCE))
> 
> (This check is used twice and could be given a name too,
>  "vmx_mcg_ext_ctl_msr_present()"?)
>

will change

> > +			return 1;
> > +		if (data && data != 0x1)
> 
>   (data & ~MCG_EXT_CTL_LMCE_EN)
> 
> is a clearer check for reserved bits.
>

ditto

> > +			return -1;
> > +		vcpu->arch.mcg_ext_ctl = data;
> > +		break;
> >  	case MSR_IA32_FEATURE_CONTROL:
> > -		if (!nested_vmx_allowed(vcpu) ||
> > +		if (!vmx_feature_control_msr_required(vcpu) ||
> >  		    (to_vmx(vcpu)->nested.msr_ia32_feature_control &
> >  		     FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
> >  			return 1;
> 
> Does hardware throw #GP when FEATURE_CONTROL_LMCE is set without
> MCG_LMCE_P?
>

Yes, SDM vol 2 says setting reserved bits of MSR causes #GP.

> (We could emulate that by having a mask of valid bits and also use that
>  mask in place of vmx_feature_control_msr_required().  I don't think
>  there is a reason to have only vmx_feature_control_msr_required() if
>  the hardware can #GP on individual bits too.)
>

Oh yes, I should also validate the individual bits. I'll add it in the
next version.

Thanks,
Haozhong
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Haozhong Zhang June 5, 2016, 3:14 p.m. UTC | #4
On 06/04/16 13:01, Boris Petkov wrote:
> Haozhong Zhang <haozhong.zhang@intel.com> wrote:
> 
> >On Intel platforms, this patch adds LMCE to KVM MCE supported
> >capabilities and handles guest access to LMCE related MSRs.
> >
> >Signed-off-by: Ashok Raj <ashok.raj@intel.com>
> >Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> 
> SOB chain needs correction wrt who the author is and who the submitter. 
> 

Ashok was also involved in the development of v1 patch and it's based
on his v0 patch, so I think I should take his SOB?

Thanks,
Haozhong
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Borislav Petkov June 5, 2016, 7:43 p.m. UTC | #5
On Sun, Jun 05, 2016 at 11:14:56PM +0800, Haozhong Zhang wrote:
> Ashok was also involved in the development of v1 patch and it's based
> on his v0 patch, so I think I should take his SOB?

You have at least three options:

1.
From: Author Name <author.name@example.com>

...

Signed-off-by: Author Name <author.name@example.com>
[ Submitter did this and that to the Author's patch ]
Signed-off-by: Submitter Name <submitter.name@example.com>

So you either do that or you say something like

2. "Based on an original patch by Ashok..."

in the commit message

or

you add the tag

3. Originally-by: Ashok..

Makes sense?

For more info see Documentation/SubmittingPatches.
Haozhong Zhang June 6, 2016, 12:12 a.m. UTC | #6
On 06/05/16 21:43, Borislav Petkov wrote:
> On Sun, Jun 05, 2016 at 11:14:56PM +0800, Haozhong Zhang wrote:
> > Ashok was also involved in the development of v1 patch and it's based
> > on his v0 patch, so I think I should take his SOB?
> 
> You have at least three options:
> 
> 1.
> From: Author Name <author.name@example.com>
> 
> ...
> 
> Signed-off-by: Author Name <author.name@example.com>
> [ Submitter did this and that to the Author's patch ]
> Signed-off-by: Submitter Name <submitter.name@example.com>
>

Thanks! I'll take this option.

Haozhong

> So you either do that or you say something like
>
> 2. "Based on an original patch by Ashok..."
> 
> in the commit message
> 
> or
> 
> you add the tag
> 
> 3. Originally-by: Ashok..
> 
> Makes sense?
> 
> For more info see Documentation/SubmittingPatches.
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
> -- 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e0fbe7e..89509f9d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -598,6 +598,7 @@  struct kvm_vcpu_arch {
 	u64 mcg_cap;
 	u64 mcg_status;
 	u64 mcg_ctl;
+	u64 mcg_ext_ctl;
 	u64 *mce_banks;
 
 	/* Cache MMIO info */
@@ -1077,6 +1078,8 @@  extern u8   kvm_tsc_scaling_ratio_frac_bits;
 /* maximum allowed value of TSC scaling ratio */
 extern u64  kvm_max_tsc_scaling_ratio;
 
+extern u64 kvm_mce_cap_supported;
+
 enum emulation_result {
 	EMULATE_DONE,         /* no further processing */
 	EMULATE_USER_EXIT,    /* kvm_run ready for userspace exit */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fb93010..42c3ee1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2863,6 +2863,11 @@  static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 	return 0;
 }
 
+static inline bool vmx_feature_control_msr_required(struct kvm_vcpu *vcpu)
+{
+	return nested_vmx_allowed(vcpu) || (vcpu->arch.mcg_cap & MCG_LMCE_P);
+}
+
 /*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
@@ -2904,8 +2909,15 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1;
 		msr_info->data = vmcs_read64(GUEST_BNDCFGS);
 		break;
+	case MSR_IA32_MCG_EXT_CTL:
+		if (!(vcpu->arch.mcg_cap & MCG_LMCE_P) ||
+		    !(to_vmx(vcpu)->nested.msr_ia32_feature_control &
+		      FEATURE_CONTROL_LMCE))
+			return 1;
+		msr_info->data = vcpu->arch.mcg_ext_ctl;
+		break;
 	case MSR_IA32_FEATURE_CONTROL:
-		if (!nested_vmx_allowed(vcpu))
+		if (!vmx_feature_control_msr_required(vcpu))
 			return 1;
 		msr_info->data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
 		break;
@@ -2997,8 +3009,17 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_TSC_ADJUST:
 		ret = kvm_set_msr_common(vcpu, msr_info);
 		break;
+	case MSR_IA32_MCG_EXT_CTL:
+		if (!(vcpu->arch.mcg_cap & MCG_LMCE_P) ||
+		    !(to_vmx(vcpu)->nested.msr_ia32_feature_control &
+		      FEATURE_CONTROL_LMCE))
+			return 1;
+		if (data && data != 0x1)
+			return -1;
+		vcpu->arch.mcg_ext_ctl = data;
+		break;
 	case MSR_IA32_FEATURE_CONTROL:
-		if (!nested_vmx_allowed(vcpu) ||
+		if (!vmx_feature_control_msr_required(vcpu) ||
 		    (to_vmx(vcpu)->nested.msr_ia32_feature_control &
 		     FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
 			return 1;
@@ -6391,6 +6412,8 @@  static __init int hardware_setup(void)
 
 	kvm_set_posted_intr_wakeup_handler(wakeup_handler);
 
+	kvm_mce_cap_supported |= MCG_LMCE_P;
+
 	return alloc_kvm_area();
 
 out8:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 902d9da..0cc8f00 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -70,7 +70,8 @@ 
 
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
-#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
+u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
+EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
 
 #define emul_to_vcpu(ctxt) \
 	container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
@@ -982,6 +983,7 @@  static u32 emulated_msrs[] = {
 	MSR_IA32_MISC_ENABLE,
 	MSR_IA32_MCG_STATUS,
 	MSR_IA32_MCG_CTL,
+	MSR_IA32_MCG_EXT_CTL,
 	MSR_IA32_SMBASE,
 };
 
@@ -2683,11 +2685,9 @@  long kvm_arch_dev_ioctl(struct file *filp,
 		break;
 	}
 	case KVM_X86_GET_MCE_CAP_SUPPORTED: {
-		u64 mce_cap;
-
-		mce_cap = KVM_MCE_CAP_SUPPORTED;
 		r = -EFAULT;
-		if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
+		if (copy_to_user(argp, &kvm_mce_cap_supported,
+				 sizeof(kvm_mce_cap_supported)))
 			goto out;
 		r = 0;
 		break;
@@ -2865,7 +2865,7 @@  static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
 	r = -EINVAL;
 	if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
 		goto out;
-	if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
+	if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
 		goto out;
 	r = 0;
 	vcpu->arch.mcg_cap = mcg_cap;