diff mbox

[8/9] KVM: MMU: fix MTRR update

Message ID 1430389490-24602-9-git-send-email-guangrong.xiao@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Xiao Guangrong April 30, 2015, 10:24 a.m. UTC
From: Xiao Guangrong <guangrong.xiao@linux.intel.com>

Currently, whenever guest MTRR registers are changed kvm_mmu_reset_context
is called to switch to the new root shadow page table, however, it's useless
since:
1) the cache type is not cached into shadow page's attribute so that the
   original root shadow page will be reused

2) the cache type is set on the last spte, that means we should sync the last
   sptes when MTRR is changed

This patch fixes this issue by dropping all the spte in the gfn range which is
being updated by MTRR

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/svm.c              |  5 ++++
 arch/x86/kvm/vmx.c              | 58 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c              |  2 +-
 4 files changed, 66 insertions(+), 1 deletion(-)

Comments

David Matlack May 6, 2015, 9:36 p.m. UTC | #1
On Thu, Apr 30, 2015 at 3:24 AM,  <guangrong.xiao@linux.intel.com> wrote:
> From: Xiao Guangrong <guangrong.xiao@linux.intel.com>
>
> Currently, whenever guest MTRR registers are changed kvm_mmu_reset_context
> is called to switch to the new root shadow page table, however, it's useless
> since:
> 1) the cache type is not cached into shadow page's attribute so that the
>    original root shadow page will be reused
>
> 2) the cache type is set on the last spte, that means we should sync the last
>    sptes when MTRR is changed
>
> This patch fixes this issue by dropping all the spte in the gfn range which is
> being updated by MTRR
>
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  2 ++
>  arch/x86/kvm/svm.c              |  5 ++++
>  arch/x86/kvm/vmx.c              | 58 +++++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/x86.c              |  2 +-
>  4 files changed, 66 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index dea2e7e..ae9528d 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -778,6 +778,8 @@ struct kvm_x86_ops {
>         int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
>         int (*get_tdp_level)(void);
>         u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
> +       void (*set_msr_mtrr)(struct kvm_vcpu *vcpu, u32 msr);
> +
>         int (*get_lpage_level)(void);
>         bool (*rdtscp_supported)(void);
>         bool (*invpcid_supported)(void);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index ce741b8..c33573c 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -4078,6 +4078,10 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
>         return 0;
>  }
>
> +static void svm_set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr)
> +{
> +}
> +
>  static void svm_cpuid_update(struct kvm_vcpu *vcpu)
>  {
>  }
> @@ -4410,6 +4414,7 @@ static struct kvm_x86_ops svm_x86_ops = {
>         .set_tss_addr = svm_set_tss_addr,
>         .get_tdp_level = get_npt_level,
>         .get_mt_mask = svm_get_mt_mask,
> +       .set_msr_mtrr = svm_set_msr_mtrr,
>
>         .get_exit_info = svm_get_exit_info,
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index f7b6168..fcd0001 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -8505,6 +8505,63 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
>         return ret;
>  }
>
> +static void vmx_set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr)
> +{
> +       struct mtrr_state_type *mtrr_state = &vcpu->arch.mtrr_state;
> +       unsigned char mtrr_enabled = mtrr_state->enabled;
> +       gfn_t start, end, mask;
> +       int index;
> +       bool is_fixed = true;
> +
> +       if (msr == MSR_IA32_CR_PAT || !enable_ept ||
> +             !kvm_arch_has_noncoherent_dma(vcpu->kvm))
> +               return;
> +
> +       if (!(mtrr_enabled & 0x2) && msr != MSR_MTRRdefType)
> +               return;
> +
> +       switch (msr) {
> +       case MSR_MTRRfix64K_00000:
> +               start = 0x0;
> +               end = 0x80000;
> +               break;
> +       case MSR_MTRRfix16K_80000:
> +               start = 0x80000;
> +               end = 0xa0000;
> +               break;
> +       case MSR_MTRRfix16K_A0000:
> +               start = 0xa0000;
> +               end = 0xc0000;
> +               break;
> +       case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
> +               index = msr - MSR_MTRRfix4K_C0000;
> +               start = 0xc0000 + index * (32 << 10);
> +               end = start + (32 << 10);
> +               break;
> +       case MSR_MTRRdefType:
> +               is_fixed = false;
> +               start = 0x0;
> +               end = ~0ULL;
> +               break;
> +       default:
> +               /* variable range MTRRs. */
> +               is_fixed = false;
> +               index = (msr - 0x200) / 2;
> +               start = (((u64)mtrr_state->var_ranges[index].base_hi) << 32) +
> +                      (mtrr_state->var_ranges[index].base_lo & PAGE_MASK);
> +               mask = (((u64)mtrr_state->var_ranges[index].mask_hi) << 32) +
> +                      (mtrr_state->var_ranges[index].mask_lo & PAGE_MASK);
> +               mask |= ~0ULL << cpuid_maxphyaddr(vcpu);
> +
> +               end = ((start & mask) | ~mask) + 1;
> +       }
> +
> +       if (is_fixed && !(mtrr_enabled & 0x1))
> +               return;

For variable range MTRRs, I think you want to break out here if the valid flag
(bit 11 of the mask MTRR) is not set.

> +
> +       kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
> +}
> +
>  static int vmx_get_lpage_level(void)
>  {
>         if (enable_ept && !cpu_has_vmx_ept_1g_page())
> @@ -10218,6 +10275,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
>         .set_tss_addr = vmx_set_tss_addr,
>         .get_tdp_level = get_ept_level,
>         .get_mt_mask = vmx_get_mt_mask,
> +       .set_msr_mtrr = vmx_set_msr_mtrr,
>
>         .get_exit_info = vmx_get_exit_info,
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index c73efcd..579c205 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1887,7 +1887,7 @@ static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>                 *pt = data;
>         }
>
> -       kvm_mmu_reset_context(vcpu);
> +       kvm_x86_ops->set_msr_mtrr(vcpu, msr);
>         return 0;
>  }
>
> --
> 1.9.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong May 7, 2015, 1:57 a.m. UTC | #2
Hi David,

Thanks for your review.

On 05/07/2015 05:36 AM, David Matlack wrote:

>> +static void vmx_set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>> +{
>> +       struct mtrr_state_type *mtrr_state = &vcpu->arch.mtrr_state;
>> +       unsigned char mtrr_enabled = mtrr_state->enabled;
>> +       gfn_t start, end, mask;
>> +       int index;
>> +       bool is_fixed = true;
>> +
>> +       if (msr == MSR_IA32_CR_PAT || !enable_ept ||
>> +             !kvm_arch_has_noncoherent_dma(vcpu->kvm))
>> +               return;
>> +
>> +       if (!(mtrr_enabled & 0x2) && msr != MSR_MTRRdefType)
>> +               return;
>> +
>> +       switch (msr) {
>> +       case MSR_MTRRfix64K_00000:
>> +               start = 0x0;
>> +               end = 0x80000;
>> +               break;
>> +       case MSR_MTRRfix16K_80000:
>> +               start = 0x80000;
>> +               end = 0xa0000;
>> +               break;
>> +       case MSR_MTRRfix16K_A0000:
>> +               start = 0xa0000;
>> +               end = 0xc0000;
>> +               break;
>> +       case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
>> +               index = msr - MSR_MTRRfix4K_C0000;
>> +               start = 0xc0000 + index * (32 << 10);
>> +               end = start + (32 << 10);
>> +               break;
>> +       case MSR_MTRRdefType:
>> +               is_fixed = false;
>> +               start = 0x0;
>> +               end = ~0ULL;
>> +               break;
>> +       default:
>> +               /* variable range MTRRs. */
>> +               is_fixed = false;
>> +               index = (msr - 0x200) / 2;
>> +               start = (((u64)mtrr_state->var_ranges[index].base_hi) << 32) +
>> +                      (mtrr_state->var_ranges[index].base_lo & PAGE_MASK);
>> +               mask = (((u64)mtrr_state->var_ranges[index].mask_hi) << 32) +
>> +                      (mtrr_state->var_ranges[index].mask_lo & PAGE_MASK);
>> +               mask |= ~0ULL << cpuid_maxphyaddr(vcpu);
>> +
>> +               end = ((start & mask) | ~mask) + 1;
>> +       }
>> +
>> +       if (is_fixed && !(mtrr_enabled & 0x1))
>> +               return;
>
> For variable range MTRRs, I think you want to break out here if the valid flag
> (bit 11 of the mask MTRR) is not set.

We should update these MTRRs whenever the valid bit is changed. If here we see
valid bit is zero, guest is disabling MTRR for that range so that we need to
drop cache type for that range we previously set.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini May 7, 2015, 4:53 p.m. UTC | #3
On 30/04/2015 12:24, guangrong.xiao@linux.intel.com wrote:
> +static void vmx_set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr)
> +{
> +	struct mtrr_state_type *mtrr_state = &vcpu->arch.mtrr_state;
> +	unsigned char mtrr_enabled = mtrr_state->enabled;
> +	gfn_t start, end, mask;
> +	int index;
> +	bool is_fixed = true;
> +
> +	if (msr == MSR_IA32_CR_PAT || !enable_ept ||
> +	      !kvm_arch_has_noncoherent_dma(vcpu->kvm))
> +		return;
> +
> +	if (!(mtrr_enabled & 0x2) && msr != MSR_MTRRdefType)
> +		return;
> +
> +	switch (msr) {
> +	case MSR_MTRRfix64K_00000:
> +		start = 0x0;
> +		end = 0x80000;
> +		break;
> +	case MSR_MTRRfix16K_80000:
> +		start = 0x80000;
> +		end = 0xa0000;
> +		break;
> +	case MSR_MTRRfix16K_A0000:
> +		start = 0xa0000;
> +		end = 0xc0000;
> +		break;
> +	case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
> +		index = msr - MSR_MTRRfix4K_C0000;
> +		start = 0xc0000 + index * (32 << 10);
> +		end = start + (32 << 10);
> +		break;
> +	case MSR_MTRRdefType:
> +		is_fixed = false;
> +		start = 0x0;
> +		end = ~0ULL;
> +		break;
> +	default:
> +		/* variable range MTRRs. */
> +		is_fixed = false;
> +		index = (msr - 0x200) / 2;
> +		start = (((u64)mtrr_state->var_ranges[index].base_hi) << 32) +
> +		       (mtrr_state->var_ranges[index].base_lo & PAGE_MASK);
> +		mask = (((u64)mtrr_state->var_ranges[index].mask_hi) << 32) +
> +		       (mtrr_state->var_ranges[index].mask_lo & PAGE_MASK);
> +		mask |= ~0ULL << cpuid_maxphyaddr(vcpu);
> +
> +		end = ((start & mask) | ~mask) + 1;
> +	}
> +
> +	if (is_fixed && !(mtrr_enabled & 0x1))
> +		return;
> +
> +	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
> +}

I think this should all be generic logic, even if it causes some extra
zaps on AMD.  (It's AMD's bug that it doesn't honor MTRRs).

Even !enable_ept can be handled in a vendor-independent manner, as
"vcpu->arch.mmu.page_fault == tdp_page_fault".

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong May 11, 2015, 1:02 p.m. UTC | #4
On 05/08/2015 12:53 AM, Paolo Bonzini wrote:
>
>
> On 30/04/2015 12:24, guangrong.xiao@linux.intel.com wrote:
>> +static void vmx_set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>> +{
>> +	struct mtrr_state_type *mtrr_state = &vcpu->arch.mtrr_state;
>> +	unsigned char mtrr_enabled = mtrr_state->enabled;
>> +	gfn_t start, end, mask;
>> +	int index;
>> +	bool is_fixed = true;
>> +
>> +	if (msr == MSR_IA32_CR_PAT || !enable_ept ||
>> +	      !kvm_arch_has_noncoherent_dma(vcpu->kvm))
>> +		return;
>> +
>> +	if (!(mtrr_enabled & 0x2) && msr != MSR_MTRRdefType)
>> +		return;
>> +
>> +	switch (msr) {
>> +	case MSR_MTRRfix64K_00000:
>> +		start = 0x0;
>> +		end = 0x80000;
>> +		break;
>> +	case MSR_MTRRfix16K_80000:
>> +		start = 0x80000;
>> +		end = 0xa0000;
>> +		break;
>> +	case MSR_MTRRfix16K_A0000:
>> +		start = 0xa0000;
>> +		end = 0xc0000;
>> +		break;
>> +	case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
>> +		index = msr - MSR_MTRRfix4K_C0000;
>> +		start = 0xc0000 + index * (32 << 10);
>> +		end = start + (32 << 10);
>> +		break;
>> +	case MSR_MTRRdefType:
>> +		is_fixed = false;
>> +		start = 0x0;
>> +		end = ~0ULL;
>> +		break;
>> +	default:
>> +		/* variable range MTRRs. */
>> +		is_fixed = false;
>> +		index = (msr - 0x200) / 2;
>> +		start = (((u64)mtrr_state->var_ranges[index].base_hi) << 32) +
>> +		       (mtrr_state->var_ranges[index].base_lo & PAGE_MASK);
>> +		mask = (((u64)mtrr_state->var_ranges[index].mask_hi) << 32) +
>> +		       (mtrr_state->var_ranges[index].mask_lo & PAGE_MASK);
>> +		mask |= ~0ULL << cpuid_maxphyaddr(vcpu);
>> +
>> +		end = ((start & mask) | ~mask) + 1;
>> +	}
>> +
>> +	if (is_fixed && !(mtrr_enabled & 0x1))
>> +		return;
>> +
>> +	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
>> +}
>
> I think this should all be generic logic, even if it causes some extra
> zaps on AMD.  (It's AMD's bug that it doesn't honor MTRRs).

Okay, will move the function to x86.c and kill the callback in x86_ops.

>
> Even !enable_ept can be handled in a vendor-independent manner, as
> "vcpu->arch.mmu.page_fault == tdp_page_fault".

We can directly use 'tdp_enabled', it has already been extern-ed. :)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dea2e7e..ae9528d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -778,6 +778,8 @@  struct kvm_x86_ops {
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
 	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
+	void (*set_msr_mtrr)(struct kvm_vcpu *vcpu, u32 msr);
+
 	int (*get_lpage_level)(void);
 	bool (*rdtscp_supported)(void);
 	bool (*invpcid_supported)(void);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ce741b8..c33573c 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4078,6 +4078,10 @@  static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 	return 0;
 }
 
+static void svm_set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr)
+{
+}
+
 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
 {
 }
@@ -4410,6 +4414,7 @@  static struct kvm_x86_ops svm_x86_ops = {
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
 	.get_mt_mask = svm_get_mt_mask,
+	.set_msr_mtrr = svm_set_msr_mtrr,
 
 	.get_exit_info = svm_get_exit_info,
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f7b6168..fcd0001 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8505,6 +8505,63 @@  static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 	return ret;
 }
 
+static void vmx_set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr)
+{
+	struct mtrr_state_type *mtrr_state = &vcpu->arch.mtrr_state;
+	unsigned char mtrr_enabled = mtrr_state->enabled;
+	gfn_t start, end, mask;
+	int index;
+	bool is_fixed = true;
+
+	if (msr == MSR_IA32_CR_PAT || !enable_ept ||
+	      !kvm_arch_has_noncoherent_dma(vcpu->kvm))
+		return;
+
+	if (!(mtrr_enabled & 0x2) && msr != MSR_MTRRdefType)
+		return;
+
+	switch (msr) {
+	case MSR_MTRRfix64K_00000:
+		start = 0x0;
+		end = 0x80000;
+		break;
+	case MSR_MTRRfix16K_80000:
+		start = 0x80000;
+		end = 0xa0000;
+		break;
+	case MSR_MTRRfix16K_A0000:
+		start = 0xa0000;
+		end = 0xc0000;
+		break;
+	case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
+		index = msr - MSR_MTRRfix4K_C0000;
+		start = 0xc0000 + index * (32 << 10);
+		end = start + (32 << 10);
+		break;
+	case MSR_MTRRdefType:
+		is_fixed = false;
+		start = 0x0;
+		end = ~0ULL;
+		break;
+	default:
+		/* variable range MTRRs. */
+		is_fixed = false;
+		index = (msr - 0x200) / 2;
+		start = (((u64)mtrr_state->var_ranges[index].base_hi) << 32) +
+		       (mtrr_state->var_ranges[index].base_lo & PAGE_MASK);
+		mask = (((u64)mtrr_state->var_ranges[index].mask_hi) << 32) +
+		       (mtrr_state->var_ranges[index].mask_lo & PAGE_MASK);
+		mask |= ~0ULL << cpuid_maxphyaddr(vcpu);
+
+		end = ((start & mask) | ~mask) + 1;
+	}
+
+	if (is_fixed && !(mtrr_enabled & 0x1))
+		return;
+
+	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
+}
+
 static int vmx_get_lpage_level(void)
 {
 	if (enable_ept && !cpu_has_vmx_ept_1g_page())
@@ -10218,6 +10275,7 @@  static struct kvm_x86_ops vmx_x86_ops = {
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
 	.get_mt_mask = vmx_get_mt_mask,
+	.set_msr_mtrr = vmx_set_msr_mtrr,
 
 	.get_exit_info = vmx_get_exit_info,
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c73efcd..579c205 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1887,7 +1887,7 @@  static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		*pt = data;
 	}
 
-	kvm_mmu_reset_context(vcpu);
+	kvm_x86_ops->set_msr_mtrr(vcpu, msr);
 	return 0;
 }