Message ID | 20220630135747.26983-15-will@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: arm64: Introduce pKVM shadow state at EL2 | expand |
> static struct hyp_pool host_s2_pool; > diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c > index d3a3b47181de..17d689483ec4 100644 > --- a/arch/arm64/kvm/hyp/nvhe/mm.c > +++ b/arch/arm64/kvm/hyp/nvhe/mm.c > @@ -14,6 +14,7 @@ > #include <nvhe/early_alloc.h> > #include <nvhe/gfp.h> > #include <nvhe/memory.h> > +#include <nvhe/mem_protect.h> > #include <nvhe/mm.h> > #include <nvhe/spinlock.h> > > @@ -24,6 +25,7 @@ struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS]; > unsigned int hyp_memblock_nr; > > static u64 __io_map_base; > +static DEFINE_PER_CPU(void *, hyp_fixmap_base); > > static int __pkvm_create_mappings(unsigned long start, unsigned long size, > unsigned long phys, enum kvm_pgtable_prot prot) > @@ -212,6 +214,76 @@ int hyp_map_vectors(void) > return 0; > } > > +void *hyp_fixmap_map(phys_addr_t phys) > +{ > + void *addr = *this_cpu_ptr(&hyp_fixmap_base); > + int ret = kvm_pgtable_hyp_map(&pkvm_pgtable, (u64)addr, PAGE_SIZE, > + phys, PAGE_HYP); > + return ret ? NULL : addr; > +} > + > +int hyp_fixmap_unmap(void) > +{ > + void *addr = *this_cpu_ptr(&hyp_fixmap_base); > + int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, (u64)addr, PAGE_SIZE); > + > + return (ret != PAGE_SIZE) ? -EINVAL : 0; > +} > + I probably missed something but as the pagetable pages for this mapping are pined, it seems impossible (currently) for this call to fail. Maybe a WARN_ON would be more appropriate, especially the callers in the subsequent patches do not seem to check for this function return value? [...]
On Tue, Jul 19, 2022 at 3:30 PM Vincent Donnefort <vdonnefort@google.com> wrote: > > > static struct hyp_pool host_s2_pool; > > diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c > > index d3a3b47181de..17d689483ec4 100644 > > --- a/arch/arm64/kvm/hyp/nvhe/mm.c > > +++ b/arch/arm64/kvm/hyp/nvhe/mm.c > > @@ -14,6 +14,7 @@ > > #include <nvhe/early_alloc.h> > > #include <nvhe/gfp.h> > > #include <nvhe/memory.h> > > +#include <nvhe/mem_protect.h> > > #include <nvhe/mm.h> > > #include <nvhe/spinlock.h> > > > > @@ -24,6 +25,7 @@ struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS]; > > unsigned int hyp_memblock_nr; > > > > static u64 __io_map_base; > > +static DEFINE_PER_CPU(void *, hyp_fixmap_base); > > > > static int __pkvm_create_mappings(unsigned long start, unsigned long size, > > unsigned long phys, enum kvm_pgtable_prot prot) > > @@ -212,6 +214,76 @@ int hyp_map_vectors(void) > > return 0; > > } > > > > +void *hyp_fixmap_map(phys_addr_t phys) > > +{ > > + void *addr = *this_cpu_ptr(&hyp_fixmap_base); > > + int ret = kvm_pgtable_hyp_map(&pkvm_pgtable, (u64)addr, PAGE_SIZE, > > + phys, PAGE_HYP); > > + return ret ? NULL : addr; > > +} > > + > > +int hyp_fixmap_unmap(void) > > +{ > > + void *addr = *this_cpu_ptr(&hyp_fixmap_base); > > + int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, (u64)addr, PAGE_SIZE); > > + > > + return (ret != PAGE_SIZE) ? -EINVAL : 0; > > +} > > + > > I probably missed something but as the pagetable pages for this mapping are > pined, it seems impossible (currently) for this call to fail. Maybe a WARN_ON > would be more appropriate, especially the callers in the subsequent patches do > not seem to check for this function return value? Right, I think that wouldn't hurt. And while looking at this, I actually think we could get rid of these calls to the map/unmap functions entirely by keeping the pointers to the reserved PTEs directly in addition to the VA to which they correspond in the percpu table. That way we could manipulate the PTEs directly and avoid unnecessary pgtable walks. Bits [63:1] can probably remain untouched, and {un}mapping is then only a matter of flipping bit 0 in the PTE (and TLBI on the unmap path). I'll have a go at it. Cheers, Quentin
On Tue, Jul 19, 2022 at 4:09 PM Quentin Perret <qperret@google.com> wrote: > > On Tue, Jul 19, 2022 at 3:30 PM Vincent Donnefort <vdonnefort@google.com> wrote: > > > > > static struct hyp_pool host_s2_pool; > > > diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c > > > index d3a3b47181de..17d689483ec4 100644 > > > --- a/arch/arm64/kvm/hyp/nvhe/mm.c > > > +++ b/arch/arm64/kvm/hyp/nvhe/mm.c > > > @@ -14,6 +14,7 @@ > > > #include <nvhe/early_alloc.h> > > > #include <nvhe/gfp.h> > > > #include <nvhe/memory.h> > > > +#include <nvhe/mem_protect.h> > > > #include <nvhe/mm.h> > > > #include <nvhe/spinlock.h> > > > > > > @@ -24,6 +25,7 @@ struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS]; > > > unsigned int hyp_memblock_nr; > > > > > > static u64 __io_map_base; > > > +static DEFINE_PER_CPU(void *, hyp_fixmap_base); > > > > > > static int __pkvm_create_mappings(unsigned long start, unsigned long size, > > > unsigned long phys, enum kvm_pgtable_prot prot) > > > @@ -212,6 +214,76 @@ int hyp_map_vectors(void) > > > return 0; > > > } > > > > > > +void *hyp_fixmap_map(phys_addr_t phys) > > > +{ > > > + void *addr = *this_cpu_ptr(&hyp_fixmap_base); > > > + int ret = kvm_pgtable_hyp_map(&pkvm_pgtable, (u64)addr, PAGE_SIZE, > > > + phys, PAGE_HYP); > > > + return ret ? NULL : addr; > > > +} > > > + > > > +int hyp_fixmap_unmap(void) > > > +{ > > > + void *addr = *this_cpu_ptr(&hyp_fixmap_base); > > > + int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, (u64)addr, PAGE_SIZE); > > > + > > > + return (ret != PAGE_SIZE) ? -EINVAL : 0; > > > +} > > > + > > > > I probably missed something but as the pagetable pages for this mapping are > > pined, it seems impossible (currently) for this call to fail. Maybe a WARN_ON > > would be more appropriate, especially the callers in the subsequent patches do > > not seem to check for this function return value? > > Right, I think that wouldn't hurt. And while looking at this, I > actually think we could get rid of these calls to the map/unmap > functions entirely by keeping the pointers to the reserved PTEs > directly in addition to the VA to which they correspond in the percpu > table. That way we could manipulate the PTEs directly and avoid > unnecessary pgtable walks. Bits [63:1] can probably remain untouched, Well, the address bits need to change too obviously, but rest can stay. > and {un}mapping is then only a matter of flipping bit 0 in the PTE > (and TLBI on the unmap path). I'll have a go at it. > > Cheers, > Quentin
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 3a0817b5c739..d11d9d68a680 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -59,6 +59,8 @@ enum pkvm_component_id { PKVM_ID_HYP, }; +extern unsigned long hyp_nr_cpus; + int __pkvm_prot_finalize(void); int __pkvm_host_share_hyp(u64 pfn); int __pkvm_host_unshare_hyp(u64 pfn); diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index b2ee6d5df55b..882c5711eda5 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -13,6 +13,10 @@ extern struct kvm_pgtable pkvm_pgtable; extern hyp_spinlock_t pkvm_pgd_lock; +int hyp_create_pcpu_fixmap(void); +void *hyp_fixmap_map(phys_addr_t phys); +int hyp_fixmap_unmap(void); + int hyp_create_idmap(u32 hyp_va_bits); int hyp_map_vectors(void); int hyp_back_vmemmap(phys_addr_t back); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 9baf731736be..a0af23de2640 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -21,7 +21,6 @@ #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) -extern unsigned long hyp_nr_cpus; struct host_kvm host_kvm; static struct hyp_pool host_s2_pool; diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index d3a3b47181de..17d689483ec4 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -14,6 +14,7 @@ #include <nvhe/early_alloc.h> #include <nvhe/gfp.h> #include <nvhe/memory.h> +#include <nvhe/mem_protect.h> #include <nvhe/mm.h> #include <nvhe/spinlock.h> @@ -24,6 +25,7 @@ struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS]; unsigned int hyp_memblock_nr; static u64 __io_map_base; +static DEFINE_PER_CPU(void *, hyp_fixmap_base); static int __pkvm_create_mappings(unsigned long start, unsigned long size, unsigned long phys, enum kvm_pgtable_prot prot) @@ -212,6 +214,76 @@ int hyp_map_vectors(void) return 0; } +void *hyp_fixmap_map(phys_addr_t phys) +{ + void *addr = *this_cpu_ptr(&hyp_fixmap_base); + int ret = kvm_pgtable_hyp_map(&pkvm_pgtable, (u64)addr, PAGE_SIZE, + phys, PAGE_HYP); + return ret ? NULL : addr; +} + +int hyp_fixmap_unmap(void) +{ + void *addr = *this_cpu_ptr(&hyp_fixmap_base); + int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, (u64)addr, PAGE_SIZE); + + return (ret != PAGE_SIZE) ? -EINVAL : 0; +} + +static int __pin_pgtable_cb(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, void * const arg) +{ + if (!kvm_pte_valid(*ptep) || level != KVM_PGTABLE_MAX_LEVELS - 1) + return -EINVAL; + hyp_page_ref_inc(hyp_virt_to_page(ptep)); + + return 0; +} + +static int hyp_pin_pgtable_pages(u64 addr) +{ + struct kvm_pgtable_walker walker = { + .cb = __pin_pgtable_cb, + .flags = KVM_PGTABLE_WALK_LEAF, + }; + + return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker); +} + +int hyp_create_pcpu_fixmap(void) +{ + unsigned long addr, i; + int ret; + + for (i = 0; i < hyp_nr_cpus; i++) { + ret = pkvm_alloc_private_va_range(PAGE_SIZE, &addr); + if (ret) + return ret; + + /* + * Create a dummy mapping, to get the intermediate page-table + * pages allocated, then take a reference on the last level + * page to keep it around at all times. + */ + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PAGE_SIZE, + __hyp_pa(__hyp_bss_start), PAGE_HYP); + if (ret) + return ret; + + ret = hyp_pin_pgtable_pages(addr); + if (ret) + return ret; + + ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, PAGE_SIZE); + if (ret != PAGE_SIZE) + return -EINVAL; + + *per_cpu_ptr(&hyp_fixmap_base, i) = (void *)addr; + } + + return 0; +} + int hyp_create_idmap(u32 hyp_va_bits) { unsigned long start, end; diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index fb0eff15a89f..3f689ffb2693 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -321,6 +321,10 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; + ret = hyp_create_pcpu_fixmap(); + if (ret) + goto out; + hyp_shadow_table_init(shadow_table_base); out: /*