diff mbox series

[2/4] KVM: x86/mmu: Get root level from walkers when retrieving MMIO SPTE

Message ID 20201218003139.2167891-3-seanjc@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/mmu: Bug fixes and cleanups in get_mmio_spte() | expand

Commit Message

Sean Christopherson Dec. 18, 2020, 12:31 a.m. UTC
Get the so called "root" level from the low level shadow page table
walkers instead of manually attempting to calculate it higher up the
stack, e.g. in get_mmio_spte().  When KVM is using PAE shadow paging,
the starting level of the walk, from the callers perspective, is not
the CR3 root but rather the PDPTR "root".  Checking for reserved bits
from the CR3 root causes get_mmio_spte() to consume uninitialized stack
data due to indexing into sptes[] for a level that was not filled by
get_walk().  This can result in false positives and/or negatives
depending on what garbage happens to be on the stack.

Opportunistically nuke a few extra newlines.

Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU")
Reported-by: Richard Herbert <rherbert@sympatico.ca>
Cc: Ben Gardon <bgardon@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/mmu/mmu.c     | 15 ++++++---------
 arch/x86/kvm/mmu/tdp_mmu.c |  5 ++++-
 arch/x86/kvm/mmu/tdp_mmu.h |  4 +++-
 3 files changed, 13 insertions(+), 11 deletions(-)

Comments

Vitaly Kuznetsov Dec. 18, 2020, 9:10 a.m. UTC | #1
Sean Christopherson <seanjc@google.com> writes:

> Get the so called "root" level from the low level shadow page table
> walkers instead of manually attempting to calculate it higher up the
> stack, e.g. in get_mmio_spte().  When KVM is using PAE shadow paging,
> the starting level of the walk, from the callers perspective, is not
> the CR3 root but rather the PDPTR "root".  Checking for reserved bits
> from the CR3 root causes get_mmio_spte() to consume uninitialized stack
> data due to indexing into sptes[] for a level that was not filled by
> get_walk().  This can result in false positives and/or negatives
> depending on what garbage happens to be on the stack.
>
> Opportunistically nuke a few extra newlines.
>
> Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU")
> Reported-by: Richard Herbert <rherbert@sympatico.ca>
> Cc: Ben Gardon <bgardon@google.com>
> Cc: stable@vger.kernel.org
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>  arch/x86/kvm/mmu/mmu.c     | 15 ++++++---------
>  arch/x86/kvm/mmu/tdp_mmu.c |  5 ++++-
>  arch/x86/kvm/mmu/tdp_mmu.h |  4 +++-
>  3 files changed, 13 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index a48cd12c01d7..52f36c879086 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3485,16 +3485,16 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
>   * Return the level of the lowest level SPTE added to sptes.
>   * That SPTE may be non-present.
>   */
> -static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
> +static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level)
>  {
>  	struct kvm_shadow_walk_iterator iterator;
>  	int leaf = -1;
>  	u64 spte;
>  
> -
>  	walk_shadow_page_lockless_begin(vcpu);
>  
> -	for (shadow_walk_init(&iterator, vcpu, addr);
> +	for (shadow_walk_init(&iterator, vcpu, addr),
> +	     *root_level = iterator.level;
>  	     shadow_walk_okay(&iterator);
>  	     __shadow_walk_next(&iterator, spte)) {
>  		leaf = iterator.level;
> @@ -3504,7 +3504,6 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
>  
>  		if (!is_shadow_present_pte(spte))
>  			break;
> -
>  	}
>  
>  	walk_shadow_page_lockless_end(vcpu);
> @@ -3517,9 +3516,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
>  {
>  	u64 sptes[PT64_ROOT_MAX_LEVEL];
>  	struct rsvd_bits_validate *rsvd_check;
> -	int root = vcpu->arch.mmu->shadow_root_level;
> -	int leaf;
> -	int level;
> +	int root, leaf, level;
>  	bool reserved = false;

Personal taste: I would've renamed 'root' to 'root_level' (to be
consistent with get_walk()/kvm_tdp_mmu_get_walk()) and 'level' to
e.g. 'l' as it's only being used as an interator ('i' would also do).

>  
>  	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) {
> @@ -3528,9 +3525,9 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
>  	}
>  
>  	if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
> -		leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes);
> +		leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
>  	else
> -		leaf = get_walk(vcpu, addr, sptes);
> +		leaf = get_walk(vcpu, addr, sptes, &root);
>  
>  	if (unlikely(leaf < 0)) {
>  		*sptep = 0ull;
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 50cec7a15ddb..a4f9447f8327 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -1148,13 +1148,16 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
>   * Return the level of the lowest level SPTE added to sptes.
>   * That SPTE may be non-present.
>   */
> -int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
> +int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
> +			 int *root_level)
>  {
>  	struct tdp_iter iter;
>  	struct kvm_mmu *mmu = vcpu->arch.mmu;
>  	gfn_t gfn = addr >> PAGE_SHIFT;
>  	int leaf = -1;
>  
> +	*root_level = vcpu->arch.mmu->shadow_root_level;
> +
>  	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
>  		leaf = iter.level;
>  		sptes[leaf - 1] = iter.old_spte;
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
> index 556e065503f6..cbbdbadd1526 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.h
> +++ b/arch/x86/kvm/mmu/tdp_mmu.h
> @@ -44,5 +44,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
>  bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
>  				   struct kvm_memory_slot *slot, gfn_t gfn);
>  
> -int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes);
> +int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
> +			 int *root_level);
> +
>  #endif /* __KVM_X86_MMU_TDP_MMU_H */

Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Paolo Bonzini Dec. 21, 2020, 6:24 p.m. UTC | #2
On 18/12/20 10:10, Vitaly Kuznetsov wrote:
>> -	int root = vcpu->arch.mmu->shadow_root_level;
>> -	int leaf;
>> -	int level;
>> +	int root, leaf, level;
>>   	bool reserved = false;
> Personal taste: I would've renamed 'root' to 'root_level' (to be
> consistent with get_walk()/kvm_tdp_mmu_get_walk()) and 'level' to
> e.g. 'l' as it's only being used as an interator ('i' would also do).

Maybe agree on the former, not really on the latter. :)

Paolo
Sean Christopherson Dec. 21, 2020, 6:30 p.m. UTC | #3
On Mon, Dec 21, 2020, Paolo Bonzini wrote:
> On 18/12/20 10:10, Vitaly Kuznetsov wrote:
> > > -	int root = vcpu->arch.mmu->shadow_root_level;
> > > -	int leaf;
> > > -	int level;
> > > +	int root, leaf, level;
> > >   	bool reserved = false;
> > Personal taste: I would've renamed 'root' to 'root_level' (to be
> > consistent with get_walk()/kvm_tdp_mmu_get_walk()) and 'level' to
> > e.g. 'l' as it's only being used as an interator ('i' would also do).
> 
> Maybe agree on the former, not really on the latter. :)

Same here.  I kept 'root' to reduce code churn, even though I'd probably have
used 'root_level' if I were writing from scratch.
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index a48cd12c01d7..52f36c879086 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3485,16 +3485,16 @@  static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
  * Return the level of the lowest level SPTE added to sptes.
  * That SPTE may be non-present.
  */
-static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
+static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level)
 {
 	struct kvm_shadow_walk_iterator iterator;
 	int leaf = -1;
 	u64 spte;
 
-
 	walk_shadow_page_lockless_begin(vcpu);
 
-	for (shadow_walk_init(&iterator, vcpu, addr);
+	for (shadow_walk_init(&iterator, vcpu, addr),
+	     *root_level = iterator.level;
 	     shadow_walk_okay(&iterator);
 	     __shadow_walk_next(&iterator, spte)) {
 		leaf = iterator.level;
@@ -3504,7 +3504,6 @@  static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
 
 		if (!is_shadow_present_pte(spte))
 			break;
-
 	}
 
 	walk_shadow_page_lockless_end(vcpu);
@@ -3517,9 +3516,7 @@  static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 {
 	u64 sptes[PT64_ROOT_MAX_LEVEL];
 	struct rsvd_bits_validate *rsvd_check;
-	int root = vcpu->arch.mmu->shadow_root_level;
-	int leaf;
-	int level;
+	int root, leaf, level;
 	bool reserved = false;
 
 	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) {
@@ -3528,9 +3525,9 @@  static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 	}
 
 	if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
-		leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes);
+		leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
 	else
-		leaf = get_walk(vcpu, addr, sptes);
+		leaf = get_walk(vcpu, addr, sptes, &root);
 
 	if (unlikely(leaf < 0)) {
 		*sptep = 0ull;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 50cec7a15ddb..a4f9447f8327 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1148,13 +1148,16 @@  bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
  * Return the level of the lowest level SPTE added to sptes.
  * That SPTE may be non-present.
  */
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+			 int *root_level)
 {
 	struct tdp_iter iter;
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
 	gfn_t gfn = addr >> PAGE_SHIFT;
 	int leaf = -1;
 
+	*root_level = vcpu->arch.mmu->shadow_root_level;
+
 	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
 		leaf = iter.level;
 		sptes[leaf - 1] = iter.old_spte;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 556e065503f6..cbbdbadd1526 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -44,5 +44,7 @@  void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
 bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
 				   struct kvm_memory_slot *slot, gfn_t gfn);
 
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes);
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+			 int *root_level);
+
 #endif /* __KVM_X86_MMU_TDP_MMU_H */