diff mbox series

[v7,3/6] KVM: arm64: Add guard pages for KVM nVHE hypervisor stack

Message ID 20220408200349.1529080-4-kaleshsingh@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Hypervisor stack enhancements | expand

Commit Message

Kalesh Singh April 8, 2022, 8:03 p.m. UTC
Map the stack pages in the flexible private VA range and allocate
guard pages below the stack as unbacked VA space. The stack is aligned
so that any valid stack address has PAGE_SHIFT bit as 1 - this is used
for overflow detection (implemented in a subsequent patch in the series).

Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
Tested-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
---

Changes in v7:
  - Add Fuad's Reviewed-by and Tested-by tags.

Changes in v6:
  - Update call to hyp_alloc_private_va_range() (return val and params)

Changes in v5:
  - Use a single allocation for stack and guard pages to ensure they
    are contiguous, per Marc

Changes in v4:
  - Replace IS_ERR_OR_NULL check with IS_ERR check now that
    hyp_alloc_private_va_range() returns an error for null
    pointer, per Fuad
  - Format comments to < 80 cols, per Fuad

Changes in v3:
  - Handle null ptr in IS_ERR_OR_NULL checks, per Mark


 arch/arm64/include/asm/kvm_asm.h |  1 +
 arch/arm64/include/asm/kvm_mmu.h |  3 +++
 arch/arm64/kvm/arm.c             | 39 +++++++++++++++++++++++++++++---
 arch/arm64/kvm/mmu.c             |  4 ++--
 4 files changed, 42 insertions(+), 5 deletions(-)

Comments

Marc Zyngier April 18, 2022, 10:01 a.m. UTC | #1
On Fri, 08 Apr 2022 21:03:26 +0100,
Kalesh Singh <kaleshsingh@google.com> wrote:
> 
> Map the stack pages in the flexible private VA range and allocate
> guard pages below the stack as unbacked VA space. The stack is aligned
> so that any valid stack address has PAGE_SHIFT bit as 1 - this is used
> for overflow detection (implemented in a subsequent patch in the series).
> 
> Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
> Tested-by: Fuad Tabba <tabba@google.com>
> Reviewed-by: Fuad Tabba <tabba@google.com>
> ---
> 
> Changes in v7:
>   - Add Fuad's Reviewed-by and Tested-by tags.
> 
> Changes in v6:
>   - Update call to hyp_alloc_private_va_range() (return val and params)
> 
> Changes in v5:
>   - Use a single allocation for stack and guard pages to ensure they
>     are contiguous, per Marc
> 
> Changes in v4:
>   - Replace IS_ERR_OR_NULL check with IS_ERR check now that
>     hyp_alloc_private_va_range() returns an error for null
>     pointer, per Fuad
>   - Format comments to < 80 cols, per Fuad
> 
> Changes in v3:
>   - Handle null ptr in IS_ERR_OR_NULL checks, per Mark
> 
> 
>  arch/arm64/include/asm/kvm_asm.h |  1 +
>  arch/arm64/include/asm/kvm_mmu.h |  3 +++
>  arch/arm64/kvm/arm.c             | 39 +++++++++++++++++++++++++++++---
>  arch/arm64/kvm/mmu.c             |  4 ++--
>  4 files changed, 42 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index d5b0386ef765..2e277f2ed671 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -169,6 +169,7 @@ struct kvm_nvhe_init_params {
>  	unsigned long tcr_el2;
>  	unsigned long tpidr_el2;
>  	unsigned long stack_hyp_va;
> +	unsigned long stack_pa;
>  	phys_addr_t pgd_pa;
>  	unsigned long hcr_el2;
>  	unsigned long vttbr;
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index a50cbb5ba402..b805316c4866 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -117,6 +117,9 @@ alternative_cb_end
>  #include <asm/mmu_context.h>
>  #include <asm/kvm_host.h>
>  
> +extern struct kvm_pgtable *hyp_pgtable;
> +extern struct mutex kvm_hyp_pgd_mutex;

I'd rather you don't expose this at all.

> +
>  void kvm_update_va_mask(struct alt_instr *alt,
>  			__le32 *origptr, __le32 *updptr, int nr_inst);
>  void kvm_compute_layout(void);
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 523bc934fe2f..5687c0175151 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -1483,7 +1483,6 @@ static void cpu_prepare_hyp_mode(int cpu)
>  	tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
>  	params->tcr_el2 = tcr;
>  
> -	params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE);
>  	params->pgd_pa = kvm_mmu_get_httbr();
>  	if (is_protected_kvm_enabled())
>  		params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS;
> @@ -1933,14 +1932,48 @@ static int init_hyp_mode(void)
>  	 * Map the Hyp stack pages
>  	 */
>  	for_each_possible_cpu(cpu) {
> +		struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
>  		char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
> -		err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
> -					  PAGE_HYP);
> +		unsigned long hyp_addr;
>  
> +		/*
> +		 * Allocate a contiguous HYP private VA range for the stack
> +		 * and guard page. The allocation is also aligned based on
> +		 * the order of its size.
> +		 */
> +		err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
> +		if (err) {
> +			kvm_err("Cannot allocate hyp stack guard page\n");
> +			goto out_err;
> +		}
> +
> +		/*
> +		 * Since the stack grows downwards, map the stack to the page
> +		 * at the higher address and leave the lower guard page
> +		 * unbacked.
> +		 *
> +		 * Any valid stack address now has the PAGE_SHIFT bit as 1
> +		 * and addresses corresponding to the guard page have the
> +		 * PAGE_SHIFT bit as 0 - this is used for overflow detection.
> +		 */
> +		mutex_lock(&kvm_hyp_pgd_mutex);
> +		err = kvm_pgtable_hyp_map(hyp_pgtable, hyp_addr + PAGE_SIZE,
> +					PAGE_SIZE, __pa(stack_page), PAGE_HYP);
> +		mutex_unlock(&kvm_hyp_pgd_mutex);

The mutex (and the HYP page table structure) really should stay
private to the MMU code. Just add a new helper that will take the lock
and use hyp_pgtable.

>  		if (err) {
>  			kvm_err("Cannot map hyp stack\n");
>  			goto out_err;
>  		}
> +
> +		/*
> +		 * Save the stack PA in nvhe_init_params. This will be needed
> +		 * to recreate the stack mapping in protected nVHE mode.
> +		 * __hyp_pa() won't do the right thing there, since the stack
> +		 * has been mapped in the flexible private VA space.
> +		 */
> +		params->stack_pa = __pa(stack_page);
> +
> +		params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
>  	}
>  
>  	for_each_possible_cpu(cpu) {
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 3d3efea4e991..a54f00bd06cc 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -22,8 +22,8 @@
>  
>  #include "trace.h"
>  
> -static struct kvm_pgtable *hyp_pgtable;
> -static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
> +struct kvm_pgtable *hyp_pgtable;
> +DEFINE_MUTEX(kvm_hyp_pgd_mutex);
>  
>  static unsigned long hyp_idmap_start;
>  static unsigned long hyp_idmap_end;

Thanks,

	M.
Kalesh Singh April 19, 2022, 2:25 a.m. UTC | #2
On Mon, Apr 18, 2022 at 3:01 AM Marc Zyngier <maz@kernel.org> wrote:
>
> On Fri, 08 Apr 2022 21:03:26 +0100,
> Kalesh Singh <kaleshsingh@google.com> wrote:
> >
> > Map the stack pages in the flexible private VA range and allocate
> > guard pages below the stack as unbacked VA space. The stack is aligned
> > so that any valid stack address has PAGE_SHIFT bit as 1 - this is used
> > for overflow detection (implemented in a subsequent patch in the series).
> >
> > Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
> > Tested-by: Fuad Tabba <tabba@google.com>
> > Reviewed-by: Fuad Tabba <tabba@google.com>
> > ---
> >
> > Changes in v7:
> >   - Add Fuad's Reviewed-by and Tested-by tags.
> >
> > Changes in v6:
> >   - Update call to hyp_alloc_private_va_range() (return val and params)
> >
> > Changes in v5:
> >   - Use a single allocation for stack and guard pages to ensure they
> >     are contiguous, per Marc
> >
> > Changes in v4:
> >   - Replace IS_ERR_OR_NULL check with IS_ERR check now that
> >     hyp_alloc_private_va_range() returns an error for null
> >     pointer, per Fuad
> >   - Format comments to < 80 cols, per Fuad
> >
> > Changes in v3:
> >   - Handle null ptr in IS_ERR_OR_NULL checks, per Mark
> >
> >
> >  arch/arm64/include/asm/kvm_asm.h |  1 +
> >  arch/arm64/include/asm/kvm_mmu.h |  3 +++
> >  arch/arm64/kvm/arm.c             | 39 +++++++++++++++++++++++++++++---
> >  arch/arm64/kvm/mmu.c             |  4 ++--
> >  4 files changed, 42 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> > index d5b0386ef765..2e277f2ed671 100644
> > --- a/arch/arm64/include/asm/kvm_asm.h
> > +++ b/arch/arm64/include/asm/kvm_asm.h
> > @@ -169,6 +169,7 @@ struct kvm_nvhe_init_params {
> >       unsigned long tcr_el2;
> >       unsigned long tpidr_el2;
> >       unsigned long stack_hyp_va;
> > +     unsigned long stack_pa;
> >       phys_addr_t pgd_pa;
> >       unsigned long hcr_el2;
> >       unsigned long vttbr;
> > diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> > index a50cbb5ba402..b805316c4866 100644
> > --- a/arch/arm64/include/asm/kvm_mmu.h
> > +++ b/arch/arm64/include/asm/kvm_mmu.h
> > @@ -117,6 +117,9 @@ alternative_cb_end
> >  #include <asm/mmu_context.h>
> >  #include <asm/kvm_host.h>
> >
> > +extern struct kvm_pgtable *hyp_pgtable;
> > +extern struct mutex kvm_hyp_pgd_mutex;
>
> I'd rather you don't expose this at all.
>
> > +
> >  void kvm_update_va_mask(struct alt_instr *alt,
> >                       __le32 *origptr, __le32 *updptr, int nr_inst);
> >  void kvm_compute_layout(void);
> > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> > index 523bc934fe2f..5687c0175151 100644
> > --- a/arch/arm64/kvm/arm.c
> > +++ b/arch/arm64/kvm/arm.c
> > @@ -1483,7 +1483,6 @@ static void cpu_prepare_hyp_mode(int cpu)
> >       tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
> >       params->tcr_el2 = tcr;
> >
> > -     params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE);
> >       params->pgd_pa = kvm_mmu_get_httbr();
> >       if (is_protected_kvm_enabled())
> >               params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS;
> > @@ -1933,14 +1932,48 @@ static int init_hyp_mode(void)
> >        * Map the Hyp stack pages
> >        */
> >       for_each_possible_cpu(cpu) {
> > +             struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
> >               char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
> > -             err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
> > -                                       PAGE_HYP);
> > +             unsigned long hyp_addr;
> >
> > +             /*
> > +              * Allocate a contiguous HYP private VA range for the stack
> > +              * and guard page. The allocation is also aligned based on
> > +              * the order of its size.
> > +              */
> > +             err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
> > +             if (err) {
> > +                     kvm_err("Cannot allocate hyp stack guard page\n");
> > +                     goto out_err;
> > +             }
> > +
> > +             /*
> > +              * Since the stack grows downwards, map the stack to the page
> > +              * at the higher address and leave the lower guard page
> > +              * unbacked.
> > +              *
> > +              * Any valid stack address now has the PAGE_SHIFT bit as 1
> > +              * and addresses corresponding to the guard page have the
> > +              * PAGE_SHIFT bit as 0 - this is used for overflow detection.
> > +              */
> > +             mutex_lock(&kvm_hyp_pgd_mutex);
> > +             err = kvm_pgtable_hyp_map(hyp_pgtable, hyp_addr + PAGE_SIZE,
> > +                                     PAGE_SIZE, __pa(stack_page), PAGE_HYP);
> > +             mutex_unlock(&kvm_hyp_pgd_mutex);
>
> The mutex (and the HYP page table structure) really should stay
> private to the MMU code. Just add a new helper that will take the lock
> and use hyp_pgtable.

Agreed. I think we can expose __create_hyp_mappings() instead here.

Thanks,
Kalesh

>
> >               if (err) {
> >                       kvm_err("Cannot map hyp stack\n");
> >                       goto out_err;
> >               }
> > +
> > +             /*
> > +              * Save the stack PA in nvhe_init_params. This will be needed
> > +              * to recreate the stack mapping in protected nVHE mode.
> > +              * __hyp_pa() won't do the right thing there, since the stack
> > +              * has been mapped in the flexible private VA space.
> > +              */
> > +             params->stack_pa = __pa(stack_page);
> > +
> > +             params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
> >       }
> >
> >       for_each_possible_cpu(cpu) {
> > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> > index 3d3efea4e991..a54f00bd06cc 100644
> > --- a/arch/arm64/kvm/mmu.c
> > +++ b/arch/arm64/kvm/mmu.c
> > @@ -22,8 +22,8 @@
> >
> >  #include "trace.h"
> >
> > -static struct kvm_pgtable *hyp_pgtable;
> > -static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
> > +struct kvm_pgtable *hyp_pgtable;
> > +DEFINE_MUTEX(kvm_hyp_pgd_mutex);
> >
> >  static unsigned long hyp_idmap_start;
> >  static unsigned long hyp_idmap_end;
>
> Thanks,
>
>         M.
>
> --
> Without deviation from the norm, progress is not possible.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index d5b0386ef765..2e277f2ed671 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -169,6 +169,7 @@  struct kvm_nvhe_init_params {
 	unsigned long tcr_el2;
 	unsigned long tpidr_el2;
 	unsigned long stack_hyp_va;
+	unsigned long stack_pa;
 	phys_addr_t pgd_pa;
 	unsigned long hcr_el2;
 	unsigned long vttbr;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index a50cbb5ba402..b805316c4866 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -117,6 +117,9 @@  alternative_cb_end
 #include <asm/mmu_context.h>
 #include <asm/kvm_host.h>
 
+extern struct kvm_pgtable *hyp_pgtable;
+extern struct mutex kvm_hyp_pgd_mutex;
+
 void kvm_update_va_mask(struct alt_instr *alt,
 			__le32 *origptr, __le32 *updptr, int nr_inst);
 void kvm_compute_layout(void);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 523bc934fe2f..5687c0175151 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1483,7 +1483,6 @@  static void cpu_prepare_hyp_mode(int cpu)
 	tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
 	params->tcr_el2 = tcr;
 
-	params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE);
 	params->pgd_pa = kvm_mmu_get_httbr();
 	if (is_protected_kvm_enabled())
 		params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS;
@@ -1933,14 +1932,48 @@  static int init_hyp_mode(void)
 	 * Map the Hyp stack pages
 	 */
 	for_each_possible_cpu(cpu) {
+		struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
 		char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
-		err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
-					  PAGE_HYP);
+		unsigned long hyp_addr;
 
+		/*
+		 * Allocate a contiguous HYP private VA range for the stack
+		 * and guard page. The allocation is also aligned based on
+		 * the order of its size.
+		 */
+		err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
+		if (err) {
+			kvm_err("Cannot allocate hyp stack guard page\n");
+			goto out_err;
+		}
+
+		/*
+		 * Since the stack grows downwards, map the stack to the page
+		 * at the higher address and leave the lower guard page
+		 * unbacked.
+		 *
+		 * Any valid stack address now has the PAGE_SHIFT bit as 1
+		 * and addresses corresponding to the guard page have the
+		 * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+		 */
+		mutex_lock(&kvm_hyp_pgd_mutex);
+		err = kvm_pgtable_hyp_map(hyp_pgtable, hyp_addr + PAGE_SIZE,
+					PAGE_SIZE, __pa(stack_page), PAGE_HYP);
+		mutex_unlock(&kvm_hyp_pgd_mutex);
 		if (err) {
 			kvm_err("Cannot map hyp stack\n");
 			goto out_err;
 		}
+
+		/*
+		 * Save the stack PA in nvhe_init_params. This will be needed
+		 * to recreate the stack mapping in protected nVHE mode.
+		 * __hyp_pa() won't do the right thing there, since the stack
+		 * has been mapped in the flexible private VA space.
+		 */
+		params->stack_pa = __pa(stack_page);
+
+		params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
 	}
 
 	for_each_possible_cpu(cpu) {
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 3d3efea4e991..a54f00bd06cc 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -22,8 +22,8 @@ 
 
 #include "trace.h"
 
-static struct kvm_pgtable *hyp_pgtable;
-static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
+struct kvm_pgtable *hyp_pgtable;
+DEFINE_MUTEX(kvm_hyp_pgd_mutex);
 
 static unsigned long hyp_idmap_start;
 static unsigned long hyp_idmap_end;