Message ID | 20230810133432.680392-1-vdonnefort@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: arm64: Do not size-order align pkvm_alloc_private_va_range() | expand |
On Thu, Aug 10, 2023 at 6:34 AM Vincent Donnefort <vdonnefort@google.com> wrote: > > commit f922c13e778d ("KVM: arm64: Introduce pkvm_alloc_private_va_range()") > added an alignment for the start address of any allocation into the nVHE > protected hypervisor private VA range. > > This alignment (order of the size of the allocation) intends to enable > efficient stack guard verification (if the PAGE_SHIFT bit is zero, the > stack pointer is on the stack guard page, a stack overflow occurred). > > But a such alignment only makes sense for stack allocation and can waste > a lot of VA space. So instead make a stack specific allocation function > (hyp_create_stack()) handling the stack guard page requirements, while > other users (e.g. fixmap) will only get page alignment. > > Signed-off-by: Vincent Donnefort <vdonnefort@google.com> > > diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h > index d5ec972b5c1e..71d17ddb562f 100644 > --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h > +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h > @@ -18,6 +18,7 @@ void *hyp_fixmap_map(phys_addr_t phys); > void hyp_fixmap_unmap(void); > > int hyp_create_idmap(u32 hyp_va_bits); > +int hyp_create_stack(unsigned long stack_pa, unsigned long *stack_va); > int hyp_map_vectors(void); > int hyp_back_vmemmap(phys_addr_t back); > int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot); > diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c > index 318298eb3d6b..275e33a8be9a 100644 > --- a/arch/arm64/kvm/hyp/nvhe/mm.c > +++ b/arch/arm64/kvm/hyp/nvhe/mm.c > @@ -44,6 +44,28 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size, > return err; > } > > +static int __pkvm_alloc_private_va_range(unsigned long start, size_t size) > +{ > + unsigned long cur; > + int ret = 0; > + > + hyp_assert_lock_held(&pkvm_pgd_lock); > + > + if (!start || start < __io_map_base) > + return -EINVAL; > + > + /* The allocated size is always a multiple of PAGE_SIZE */ > + cur = start + PAGE_ALIGN(size); > + > + /* Are we overflowing on the vmemmap ? */ > + if (cur > __hyp_vmemmap) > + ret = -ENOMEM; > + else > + __io_map_base = cur; > + > + return ret; > +} > + > /** > * pkvm_alloc_private_va_range - Allocates a private VA range. > * @size: The size of the VA range to reserve. > @@ -56,27 +78,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size, > */ > int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr) > { > - unsigned long base, addr; > - int ret = 0; > + unsigned long addr; > + int ret; > > hyp_spin_lock(&pkvm_pgd_lock); > - > - /* Align the allocation based on the order of its size */ > - addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size)); > - > - /* The allocated size is always a multiple of PAGE_SIZE */ > - base = addr + PAGE_ALIGN(size); > - > - /* Are we overflowing on the vmemmap ? */ > - if (!addr || base > __hyp_vmemmap) > - ret = -ENOMEM; > - else { > - __io_map_base = base; > - *haddr = addr; > - } > - > + addr = __io_map_base; > + ret = __pkvm_alloc_private_va_range(addr, size); > hyp_spin_unlock(&pkvm_pgd_lock); > > + *haddr = addr; > + > return ret; > } > > @@ -340,6 +351,39 @@ int hyp_create_idmap(u32 hyp_va_bits) > return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC); > } > > +int hyp_create_stack(unsigned long stack_pa, unsigned long *stack_va) > +{ Hi Vincent, Can you rename this instead as pkvm_create_stack(), since it's for the protected case. And I assume we want the same thing for the conventional nVHE mode: remove the alignment from hyp_alloc_private_va_range() into a hyp_create_stack(). Thanks, Kalesh > + unsigned long addr; > + size_t size; > + int ret; > + > + hyp_spin_lock(&pkvm_pgd_lock); > + > + /* Make room for the guard page */ > + size = PAGE_SIZE * 2; > + addr = ALIGN(__io_map_base, size); > + > + ret = __pkvm_alloc_private_va_range(addr, size); > + if (!ret) { > + /* > + * Since the stack grows downwards, map the stack to the page > + * at the higher address and leave the lower guard page > + * unbacked. > + * > + * Any valid stack address now has the PAGE_SHIFT bit as 1 > + * and addresses corresponding to the guard page have the > + * PAGE_SHIFT bit as 0 - this is used for overflow detection. > + */ > + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE, > + PAGE_SIZE, stack_pa, PAGE_HYP); > + } > + hyp_spin_unlock(&pkvm_pgd_lock); > + > + *stack_va = addr + size; > + > + return ret; > +} > + > static void *admit_host_page(void *arg) > { > struct kvm_hyp_memcache *host_mc = arg; > diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c > index bb98630dfeaf..782c8d0fb905 100644 > --- a/arch/arm64/kvm/hyp/nvhe/setup.c > +++ b/arch/arm64/kvm/hyp/nvhe/setup.c > @@ -121,33 +121,11 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, > if (ret) > return ret; > > - /* > - * Allocate a contiguous HYP private VA range for the stack > - * and guard page. The allocation is also aligned based on > - * the order of its size. > - */ > - ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr); > + ret = hyp_create_stack(params->stack_pa, &hyp_addr); > if (ret) > return ret; > > - /* > - * Since the stack grows downwards, map the stack to the page > - * at the higher address and leave the lower guard page > - * unbacked. > - * > - * Any valid stack address now has the PAGE_SHIFT bit as 1 > - * and addresses corresponding to the guard page have the > - * PAGE_SHIFT bit as 0 - this is used for overflow detection. > - */ > - hyp_spin_lock(&pkvm_pgd_lock); > - ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE, > - PAGE_SIZE, params->stack_pa, PAGE_HYP); > - hyp_spin_unlock(&pkvm_pgd_lock); > - if (ret) > - return ret; > - > - /* Update stack_hyp_va to end of the stack's private VA range */ > - params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE); > + params->stack_hyp_va = hyp_addr; > } > > /* > > base-commit: 52a93d39b17dc7eb98b6aa3edb93943248e03b2f > -- > 2.41.0.640.ga95def55d0-goog >
On Thu, Aug 10, 2023 at 09:42:54AM -0700, Kalesh Singh wrote: > On Thu, Aug 10, 2023 at 6:34 AM Vincent Donnefort <vdonnefort@google.com> wrote: > > > > commit f922c13e778d ("KVM: arm64: Introduce pkvm_alloc_private_va_range()") > > added an alignment for the start address of any allocation into the nVHE > > protected hypervisor private VA range. > > > > This alignment (order of the size of the allocation) intends to enable > > efficient stack guard verification (if the PAGE_SHIFT bit is zero, the > > stack pointer is on the stack guard page, a stack overflow occurred). > > > > But a such alignment only makes sense for stack allocation and can waste > > a lot of VA space. So instead make a stack specific allocation function > > (hyp_create_stack()) handling the stack guard page requirements, while > > other users (e.g. fixmap) will only get page alignment. > > > > Signed-off-by: Vincent Donnefort <vdonnefort@google.com> > > > > diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h > > index d5ec972b5c1e..71d17ddb562f 100644 > > --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h > > +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h > > @@ -18,6 +18,7 @@ void *hyp_fixmap_map(phys_addr_t phys); > > void hyp_fixmap_unmap(void); > > > > int hyp_create_idmap(u32 hyp_va_bits); > > +int hyp_create_stack(unsigned long stack_pa, unsigned long *stack_va); > > int hyp_map_vectors(void); > > int hyp_back_vmemmap(phys_addr_t back); > > int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot); > > diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c > > index 318298eb3d6b..275e33a8be9a 100644 > > --- a/arch/arm64/kvm/hyp/nvhe/mm.c > > +++ b/arch/arm64/kvm/hyp/nvhe/mm.c > > @@ -44,6 +44,28 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size, > > return err; > > } > > > > +static int __pkvm_alloc_private_va_range(unsigned long start, size_t size) > > +{ > > + unsigned long cur; > > + int ret = 0; > > + > > + hyp_assert_lock_held(&pkvm_pgd_lock); > > + > > + if (!start || start < __io_map_base) > > + return -EINVAL; > > + > > + /* The allocated size is always a multiple of PAGE_SIZE */ > > + cur = start + PAGE_ALIGN(size); > > + > > + /* Are we overflowing on the vmemmap ? */ > > + if (cur > __hyp_vmemmap) > > + ret = -ENOMEM; > > + else > > + __io_map_base = cur; > > + > > + return ret; > > +} > > + > > /** > > * pkvm_alloc_private_va_range - Allocates a private VA range. > > * @size: The size of the VA range to reserve. > > @@ -56,27 +78,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size, > > */ > > int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr) > > { > > - unsigned long base, addr; > > - int ret = 0; > > + unsigned long addr; > > + int ret; > > > > hyp_spin_lock(&pkvm_pgd_lock); > > - > > - /* Align the allocation based on the order of its size */ > > - addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size)); > > - > > - /* The allocated size is always a multiple of PAGE_SIZE */ > > - base = addr + PAGE_ALIGN(size); > > - > > - /* Are we overflowing on the vmemmap ? */ > > - if (!addr || base > __hyp_vmemmap) > > - ret = -ENOMEM; > > - else { > > - __io_map_base = base; > > - *haddr = addr; > > - } > > - > > + addr = __io_map_base; > > + ret = __pkvm_alloc_private_va_range(addr, size); > > hyp_spin_unlock(&pkvm_pgd_lock); > > > > + *haddr = addr; > > + > > return ret; > > } > > > > @@ -340,6 +351,39 @@ int hyp_create_idmap(u32 hyp_va_bits) > > return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC); > > } > > > > +int hyp_create_stack(unsigned long stack_pa, unsigned long *stack_va) > > +{ > > Hi Vincent, > > Can you rename this instead as pkvm_create_stack(), since it's for the > protected case. Not sure about the pkvm_create_stack(), I wanted to have something that looks like hyp_create_idmap() that is also pkvm only. And as the latter, the stack creation happens only during the setup, so I thought I'd better keep this format. Thoughts? > > And I assume we want the same thing for the conventional nVHE mode: > remove the alignment from hyp_alloc_private_va_range() into a > hyp_create_stack(). Ah yes, good catch, even though it'll be less of a problem for non-protected nVHE, it'd probably be a good thing of aligning both. Might do that in a separated patch though. > > Thanks, > Kalesh Thanks for having a look! > > > > + unsigned long addr; > > + size_t size; > > + int ret; > > + > > + hyp_spin_lock(&pkvm_pgd_lock); > > + > > + /* Make room for the guard page */ > > + size = PAGE_SIZE * 2; > > + addr = ALIGN(__io_map_base, size); > > + > > + ret = __pkvm_alloc_private_va_range(addr, size); > > + if (!ret) { > > + /* > > + * Since the stack grows downwards, map the stack to the page > > + * at the higher address and leave the lower guard page > > + * unbacked. > > + * > > + * Any valid stack address now has the PAGE_SHIFT bit as 1 > > + * and addresses corresponding to the guard page have the > > + * PAGE_SHIFT bit as 0 - this is used for overflow detection. > > + */ > > + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE, > > + PAGE_SIZE, stack_pa, PAGE_HYP); > > + } > > + hyp_spin_unlock(&pkvm_pgd_lock); > > + > > + *stack_va = addr + size; > > + > > + return ret; > > +} > > + > > static void *admit_host_page(void *arg) > > { > > struct kvm_hyp_memcache *host_mc = arg; > > diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c > > index bb98630dfeaf..782c8d0fb905 100644 > > --- a/arch/arm64/kvm/hyp/nvhe/setup.c > > +++ b/arch/arm64/kvm/hyp/nvhe/setup.c > > @@ -121,33 +121,11 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, > > if (ret) > > return ret; > > > > - /* > > - * Allocate a contiguous HYP private VA range for the stack > > - * and guard page. The allocation is also aligned based on > > - * the order of its size. > > - */ > > - ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr); > > + ret = hyp_create_stack(params->stack_pa, &hyp_addr); > > if (ret) > > return ret; > > > > - /* > > - * Since the stack grows downwards, map the stack to the page > > - * at the higher address and leave the lower guard page > > - * unbacked. > > - * > > - * Any valid stack address now has the PAGE_SHIFT bit as 1 > > - * and addresses corresponding to the guard page have the > > - * PAGE_SHIFT bit as 0 - this is used for overflow detection. > > - */ > > - hyp_spin_lock(&pkvm_pgd_lock); > > - ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE, > > - PAGE_SIZE, params->stack_pa, PAGE_HYP); > > - hyp_spin_unlock(&pkvm_pgd_lock); > > - if (ret) > > - return ret; > > - > > - /* Update stack_hyp_va to end of the stack's private VA range */ > > - params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE); > > + params->stack_hyp_va = hyp_addr; > > } > > > > /* > > > > base-commit: 52a93d39b17dc7eb98b6aa3edb93943248e03b2f > > -- > > 2.41.0.640.ga95def55d0-goog > >
On Thu, Aug 10, 2023 at 10:09 AM Vincent Donnefort <vdonnefort@google.com> wrote: > > On Thu, Aug 10, 2023 at 09:42:54AM -0700, Kalesh Singh wrote: > > On Thu, Aug 10, 2023 at 6:34 AM Vincent Donnefort <vdonnefort@google.com> wrote: > > > > > > commit f922c13e778d ("KVM: arm64: Introduce pkvm_alloc_private_va_range()") > > > added an alignment for the start address of any allocation into the nVHE > > > protected hypervisor private VA range. > > > > > > This alignment (order of the size of the allocation) intends to enable > > > efficient stack guard verification (if the PAGE_SHIFT bit is zero, the > > > stack pointer is on the stack guard page, a stack overflow occurred). > > > > > > But a such alignment only makes sense for stack allocation and can waste > > > a lot of VA space. So instead make a stack specific allocation function > > > (hyp_create_stack()) handling the stack guard page requirements, while > > > other users (e.g. fixmap) will only get page alignment. > > > > > > Signed-off-by: Vincent Donnefort <vdonnefort@google.com> > > > > > > diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h > > > index d5ec972b5c1e..71d17ddb562f 100644 > > > --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h > > > +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h > > > @@ -18,6 +18,7 @@ void *hyp_fixmap_map(phys_addr_t phys); > > > void hyp_fixmap_unmap(void); > > > > > > int hyp_create_idmap(u32 hyp_va_bits); > > > +int hyp_create_stack(unsigned long stack_pa, unsigned long *stack_va); > > > int hyp_map_vectors(void); > > > int hyp_back_vmemmap(phys_addr_t back); > > > int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot); > > > diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c > > > index 318298eb3d6b..275e33a8be9a 100644 > > > --- a/arch/arm64/kvm/hyp/nvhe/mm.c > > > +++ b/arch/arm64/kvm/hyp/nvhe/mm.c > > > @@ -44,6 +44,28 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size, > > > return err; > > > } > > > > > > +static int __pkvm_alloc_private_va_range(unsigned long start, size_t size) > > > +{ > > > + unsigned long cur; > > > + int ret = 0; > > > + > > > + hyp_assert_lock_held(&pkvm_pgd_lock); > > > + > > > + if (!start || start < __io_map_base) > > > + return -EINVAL; > > > + > > > + /* The allocated size is always a multiple of PAGE_SIZE */ > > > + cur = start + PAGE_ALIGN(size); > > > + > > > + /* Are we overflowing on the vmemmap ? */ > > > + if (cur > __hyp_vmemmap) > > > + ret = -ENOMEM; > > > + else > > > + __io_map_base = cur; > > > + > > > + return ret; > > > +} > > > + > > > /** > > > * pkvm_alloc_private_va_range - Allocates a private VA range. > > > * @size: The size of the VA range to reserve. > > > @@ -56,27 +78,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size, > > > */ > > > int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr) > > > { > > > - unsigned long base, addr; > > > - int ret = 0; > > > + unsigned long addr; > > > + int ret; > > > > > > hyp_spin_lock(&pkvm_pgd_lock); > > > - > > > - /* Align the allocation based on the order of its size */ > > > - addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size)); > > > - > > > - /* The allocated size is always a multiple of PAGE_SIZE */ > > > - base = addr + PAGE_ALIGN(size); > > > - > > > - /* Are we overflowing on the vmemmap ? */ > > > - if (!addr || base > __hyp_vmemmap) > > > - ret = -ENOMEM; > > > - else { > > > - __io_map_base = base; > > > - *haddr = addr; > > > - } > > > - > > > + addr = __io_map_base; > > > + ret = __pkvm_alloc_private_va_range(addr, size); > > > hyp_spin_unlock(&pkvm_pgd_lock); > > > > > > + *haddr = addr; > > > + > > > return ret; > > > } > > > > > > @@ -340,6 +351,39 @@ int hyp_create_idmap(u32 hyp_va_bits) > > > return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC); > > > } > > > > > > +int hyp_create_stack(unsigned long stack_pa, unsigned long *stack_va) > > > +{ > > > > Hi Vincent, > > > > Can you rename this instead as pkvm_create_stack(), since it's for the > > protected case. > > Not sure about the pkvm_create_stack(), I wanted to have something that looks > like hyp_create_idmap() that is also pkvm only. And as the latter, the stack > creation happens only during the setup, so I thought I'd better keep this > format. > > Thoughts? What I meant is (following the existing naming convention): pKVM: pkvm_create_stack() -> pkvm_alloc_private_va_range() nVHE: hyp_create_stack() -> hyp_alloc_private_va_range() > > > > > And I assume we want the same thing for the conventional nVHE mode: > > remove the alignment from hyp_alloc_private_va_range() into a > > hyp_create_stack(). > > Ah yes, good catch, even though it'll be less of a problem for non-protected > nVHE, it'd probably be a good thing of aligning both. Might do that in a > separated patch though. Yeah fine with a follow up patch. Thanks, Kalesh > > > > > Thanks, > > Kalesh > > Thanks for having a look! > > > > > > > > + unsigned long addr; > > > + size_t size; > > > + int ret; > > > + > > > + hyp_spin_lock(&pkvm_pgd_lock); > > > + > > > + /* Make room for the guard page */ > > > + size = PAGE_SIZE * 2; > > > + addr = ALIGN(__io_map_base, size); > > > + > > > + ret = __pkvm_alloc_private_va_range(addr, size); > > > + if (!ret) { > > > + /* > > > + * Since the stack grows downwards, map the stack to the page > > > + * at the higher address and leave the lower guard page > > > + * unbacked. > > > + * > > > + * Any valid stack address now has the PAGE_SHIFT bit as 1 > > > + * and addresses corresponding to the guard page have the > > > + * PAGE_SHIFT bit as 0 - this is used for overflow detection. > > > + */ > > > + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE, > > > + PAGE_SIZE, stack_pa, PAGE_HYP); > > > + } > > > + hyp_spin_unlock(&pkvm_pgd_lock); > > > + > > > + *stack_va = addr + size; > > > + > > > + return ret; > > > +} > > > + > > > static void *admit_host_page(void *arg) > > > { > > > struct kvm_hyp_memcache *host_mc = arg; > > > diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c > > > index bb98630dfeaf..782c8d0fb905 100644 > > > --- a/arch/arm64/kvm/hyp/nvhe/setup.c > > > +++ b/arch/arm64/kvm/hyp/nvhe/setup.c > > > @@ -121,33 +121,11 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, > > > if (ret) > > > return ret; > > > > > > - /* > > > - * Allocate a contiguous HYP private VA range for the stack > > > - * and guard page. The allocation is also aligned based on > > > - * the order of its size. > > > - */ > > > - ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr); > > > + ret = hyp_create_stack(params->stack_pa, &hyp_addr); > > > if (ret) > > > return ret; > > > > > > - /* > > > - * Since the stack grows downwards, map the stack to the page > > > - * at the higher address and leave the lower guard page > > > - * unbacked. > > > - * > > > - * Any valid stack address now has the PAGE_SHIFT bit as 1 > > > - * and addresses corresponding to the guard page have the > > > - * PAGE_SHIFT bit as 0 - this is used for overflow detection. > > > - */ > > > - hyp_spin_lock(&pkvm_pgd_lock); > > > - ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE, > > > - PAGE_SIZE, params->stack_pa, PAGE_HYP); > > > - hyp_spin_unlock(&pkvm_pgd_lock); > > > - if (ret) > > > - return ret; > > > - > > > - /* Update stack_hyp_va to end of the stack's private VA range */ > > > - params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE); > > > + params->stack_hyp_va = hyp_addr; > > > } > > > > > > /* > > > > > > base-commit: 52a93d39b17dc7eb98b6aa3edb93943248e03b2f > > > -- > > > 2.41.0.640.ga95def55d0-goog > > >
On Thu, 10 Aug 2023 18:09:51 +0100, Vincent Donnefort <vdonnefort@google.com> wrote: > > On Thu, Aug 10, 2023 at 09:42:54AM -0700, Kalesh Singh wrote: > > And I assume we want the same thing for the conventional nVHE mode: > > remove the alignment from hyp_alloc_private_va_range() into a > > hyp_create_stack(). > > Ah yes, good catch, even though it'll be less of a problem for non-protected > nVHE, it'd probably be a good thing of aligning both. Might do that in a > separated patch though. I'd very much like to keep the two in sync. If anything, it is far easier to debug on standard nVHE than with protected mode, so anything that brings the two close to each other gets my vote. Thanks, M.
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index d5ec972b5c1e..71d17ddb562f 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -18,6 +18,7 @@ void *hyp_fixmap_map(phys_addr_t phys); void hyp_fixmap_unmap(void); int hyp_create_idmap(u32 hyp_va_bits); +int hyp_create_stack(unsigned long stack_pa, unsigned long *stack_va); int hyp_map_vectors(void); int hyp_back_vmemmap(phys_addr_t back); int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot); diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 318298eb3d6b..275e33a8be9a 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -44,6 +44,28 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size, return err; } +static int __pkvm_alloc_private_va_range(unsigned long start, size_t size) +{ + unsigned long cur; + int ret = 0; + + hyp_assert_lock_held(&pkvm_pgd_lock); + + if (!start || start < __io_map_base) + return -EINVAL; + + /* The allocated size is always a multiple of PAGE_SIZE */ + cur = start + PAGE_ALIGN(size); + + /* Are we overflowing on the vmemmap ? */ + if (cur > __hyp_vmemmap) + ret = -ENOMEM; + else + __io_map_base = cur; + + return ret; +} + /** * pkvm_alloc_private_va_range - Allocates a private VA range. * @size: The size of the VA range to reserve. @@ -56,27 +78,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size, */ int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr) { - unsigned long base, addr; - int ret = 0; + unsigned long addr; + int ret; hyp_spin_lock(&pkvm_pgd_lock); - - /* Align the allocation based on the order of its size */ - addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size)); - - /* The allocated size is always a multiple of PAGE_SIZE */ - base = addr + PAGE_ALIGN(size); - - /* Are we overflowing on the vmemmap ? */ - if (!addr || base > __hyp_vmemmap) - ret = -ENOMEM; - else { - __io_map_base = base; - *haddr = addr; - } - + addr = __io_map_base; + ret = __pkvm_alloc_private_va_range(addr, size); hyp_spin_unlock(&pkvm_pgd_lock); + *haddr = addr; + return ret; } @@ -340,6 +351,39 @@ int hyp_create_idmap(u32 hyp_va_bits) return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC); } +int hyp_create_stack(unsigned long stack_pa, unsigned long *stack_va) +{ + unsigned long addr; + size_t size; + int ret; + + hyp_spin_lock(&pkvm_pgd_lock); + + /* Make room for the guard page */ + size = PAGE_SIZE * 2; + addr = ALIGN(__io_map_base, size); + + ret = __pkvm_alloc_private_va_range(addr, size); + if (!ret) { + /* + * Since the stack grows downwards, map the stack to the page + * at the higher address and leave the lower guard page + * unbacked. + * + * Any valid stack address now has the PAGE_SHIFT bit as 1 + * and addresses corresponding to the guard page have the + * PAGE_SHIFT bit as 0 - this is used for overflow detection. + */ + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE, + PAGE_SIZE, stack_pa, PAGE_HYP); + } + hyp_spin_unlock(&pkvm_pgd_lock); + + *stack_va = addr + size; + + return ret; +} + static void *admit_host_page(void *arg) { struct kvm_hyp_memcache *host_mc = arg; diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index bb98630dfeaf..782c8d0fb905 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -121,33 +121,11 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, if (ret) return ret; - /* - * Allocate a contiguous HYP private VA range for the stack - * and guard page. The allocation is also aligned based on - * the order of its size. - */ - ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr); + ret = hyp_create_stack(params->stack_pa, &hyp_addr); if (ret) return ret; - /* - * Since the stack grows downwards, map the stack to the page - * at the higher address and leave the lower guard page - * unbacked. - * - * Any valid stack address now has the PAGE_SHIFT bit as 1 - * and addresses corresponding to the guard page have the - * PAGE_SHIFT bit as 0 - this is used for overflow detection. - */ - hyp_spin_lock(&pkvm_pgd_lock); - ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE, - PAGE_SIZE, params->stack_pa, PAGE_HYP); - hyp_spin_unlock(&pkvm_pgd_lock); - if (ret) - return ret; - - /* Update stack_hyp_va to end of the stack's private VA range */ - params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE); + params->stack_hyp_va = hyp_addr; } /*
commit f922c13e778d ("KVM: arm64: Introduce pkvm_alloc_private_va_range()") added an alignment for the start address of any allocation into the nVHE protected hypervisor private VA range. This alignment (order of the size of the allocation) intends to enable efficient stack guard verification (if the PAGE_SHIFT bit is zero, the stack pointer is on the stack guard page, a stack overflow occurred). But a such alignment only makes sense for stack allocation and can waste a lot of VA space. So instead make a stack specific allocation function (hyp_create_stack()) handling the stack guard page requirements, while other users (e.g. fixmap) will only get page alignment. Signed-off-by: Vincent Donnefort <vdonnefort@google.com> base-commit: 52a93d39b17dc7eb98b6aa3edb93943248e03b2f