Message ID | 20200212160918.18470-5-liuwe@microsoft.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Xen on Hyper-V: Implement L0 assisted TLB flush | expand |
On Wed, Feb 12, 2020 at 04:09:18PM +0000, Wei Liu wrote: > Implement L0 assisted TLB flush for Xen on Hyper-V. It takes advantage > of several hypercalls: > > * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST > * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX > * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE > * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX > > Pick the most efficient hypercalls available. > > Signed-off-by: Wei Liu <liuwe@microsoft.com> > --- > xen/arch/x86/guest/hyperv/Makefile | 1 + > xen/arch/x86/guest/hyperv/private.h | 9 ++ > xen/arch/x86/guest/hyperv/tlb.c | 172 +++++++++++++++++++++++++++- > xen/arch/x86/guest/hyperv/util.c | 72 ++++++++++++ > 4 files changed, 253 insertions(+), 1 deletion(-) > create mode 100644 xen/arch/x86/guest/hyperv/util.c > > diff --git a/xen/arch/x86/guest/hyperv/Makefile b/xen/arch/x86/guest/hyperv/Makefile > index 18902c33e9..0e39410968 100644 > --- a/xen/arch/x86/guest/hyperv/Makefile > +++ b/xen/arch/x86/guest/hyperv/Makefile > @@ -1,2 +1,3 @@ > obj-y += hyperv.o > obj-y += tlb.o > +obj-y += util.o > diff --git a/xen/arch/x86/guest/hyperv/private.h b/xen/arch/x86/guest/hyperv/private.h > index 78e52f74ce..311f060495 100644 > --- a/xen/arch/x86/guest/hyperv/private.h > +++ b/xen/arch/x86/guest/hyperv/private.h > @@ -24,12 +24,21 @@ > > #include <xen/cpumask.h> > #include <xen/percpu.h> > +#include <xen/types.h> > > DECLARE_PER_CPU(void *, hv_input_page); > DECLARE_PER_CPU(void *, hv_vp_assist); > DECLARE_PER_CPU(uint32_t, hv_vp_index); > > +static inline uint32_t hv_vp_index(int cpu) unsigned int for cpu. > +{ > + return per_cpu(hv_vp_index, cpu); > +} > + > int hyperv_flush_tlb(const cpumask_t *mask, const void *va, > unsigned int flags); > > +/* Returns number of banks, -ev if error */ > +int cpumask_to_vpset(struct hv_vpset *vpset, const cpumask_t *mask); > + > #endif /* __XEN_HYPERV_PRIVIATE_H__ */ > diff --git a/xen/arch/x86/guest/hyperv/tlb.c b/xen/arch/x86/guest/hyperv/tlb.c > index 48f527229e..99b789d9e9 100644 > --- a/xen/arch/x86/guest/hyperv/tlb.c > +++ b/xen/arch/x86/guest/hyperv/tlb.c > @@ -19,15 +19,185 @@ > * Copyright (c) 2020 Microsoft. > */ > > +#include <xen/cpu.h> > #include <xen/cpumask.h> > #include <xen/errno.h> > > +#include <asm/guest/hyperv.h> > +#include <asm/guest/hyperv-hcall.h> > +#include <asm/guest/hyperv-tlfs.h> > + > #include "private.h" > > +/* > + * It is possible to encode up to 4096 pages using the lower 12 bits > + * in an element of gva_list > + */ > +#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) > +#define ORDER_TO_BYTES(order) ((1ul << (order)) * PAGE_SIZE) There are already some conversion functions in xen/mm.h (get_order_from_{bytes/pages}), maybe you could add a get_bytes_from_order helper there? > + > +static unsigned int fill_gva_list(uint64_t *gva_list, const void *va, > + unsigned int order) > +{ > + unsigned long start = (unsigned long)va; > + unsigned long end = start + ORDER_TO_BYTES(order) - 1; > + unsigned int n = 0; > + > + do { > + unsigned long remain = end > start ? end - start : 0; I don't think you can get here with end == start? As that's the condition of the loop, and order 0 is going to set end = start + 4096 - 1. > + > + gva_list[n] = start & PAGE_MASK; > + > + /* > + * Use lower 12 bits to encode the number of additional pages > + * to flush > + */ > + if ( remain >= HV_TLB_FLUSH_UNIT ) > + { > + gva_list[n] |= ~PAGE_MASK; > + start += HV_TLB_FLUSH_UNIT; > + } > + else if ( remain ) > + { > + gva_list[n] |= (remain - 1) >> PAGE_SHIFT; > + start = end; > + } > + > + n++; > + } while ( start < end ); > + > + return n; > +} > + > +static uint64_t flush_tlb_ex(const cpumask_t *mask, const void *va, > + unsigned int flags) > +{ > + struct hv_tlb_flush_ex *flush = this_cpu(hv_input_page); > + int nr_banks; > + unsigned int max_gvas; > + unsigned int order = flags & FLUSH_ORDER_MASK; > + uint64_t ret; > + > + ASSERT(flush); > + ASSERT(!local_irq_is_enabled()); Can you turn this into an if condition with ASSERT_UNREACHABLE and return ~0ULL? (as I think that signals an error). > + > + if ( !(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED) ) > + return ~0ULL; > + > + flush->address_space = 0; > + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; > + if ( !(flags & FLUSH_TLB_GLOBAL) ) > + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; > + > + flush->hv_vp_set.valid_bank_mask = 0; > + flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; > + > + nr_banks = cpumask_to_vpset(&flush->hv_vp_set, mask); > + if ( nr_banks < 0 ) > + return ~0ULL; > + > + max_gvas = > + (PAGE_SIZE - sizeof(*flush) - nr_banks * > + sizeof(flush->hv_vp_set.bank_contents[0])) / > + sizeof(uint64_t); /* gva is represented as uint64_t */ > + > + /* > + * Flush the entire address space if va is NULL or if there is not > + * enough space for gva_list. > + */ > + if ( !va || (ORDER_TO_BYTES(order) / HV_TLB_FLUSH_UNIT) > max_gvas ) > + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 0, > + nr_banks, virt_to_maddr(flush), 0); > + else > + { > + uint64_t *gva_list = (uint64_t *)flush + sizeof(*flush) + nr_banks; Don't you need nr_banks * sizeof(flush->hv_vp_set.bank_contents) in order to calculate the position of the gva_list? > + unsigned int gvas = fill_gva_list(gva_list, va, order); > + > + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, > + gvas, nr_banks, virt_to_maddr(flush), 0); > + } > + > + return ret; > +} > + > int hyperv_flush_tlb(const cpumask_t *mask, const void *va, > unsigned int flags) > { > - return -EOPNOTSUPP; > + unsigned long irq_flags; > + struct hv_tlb_flush *flush = this_cpu(hv_input_page); > + uint64_t ret; > + unsigned int order = flags & FLUSH_ORDER_MASK; > + unsigned int max_gvas; > + > + ASSERT(flush); > + ASSERT(!cpumask_empty(mask)); > + > + local_irq_save(irq_flags); > + > + flush->address_space = 0; > + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; > + flush->processor_mask = 0; > + if ( !(flags & FLUSH_TLB_GLOBAL) ) > + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; > + > + if ( cpumask_equal(mask, &cpu_online_map) ) > + flush->flags |= HV_FLUSH_ALL_PROCESSORS; > + else > + { > + int cpu; unsigned int. > + > + /* > + * Normally VP indices are in ascending order and match Xen's > + * idea of CPU ids. Check the last index to see if VP index is > + * >= 64. If so, we can skip setting up parameters for > + * non-applicable hypercalls without looking further. > + */ > + if ( hv_vp_index(cpumask_last(mask)) >= 64 ) > + goto do_ex_hypercall; > + > + for_each_cpu ( cpu, mask ) > + { > + uint32_t vpid = hv_vp_index(cpu); > + > + if ( vpid > ms_hyperv.max_vp_index ) > + { > + local_irq_restore(irq_flags); > + return -ENXIO; > + } > + > + if ( vpid >= 64 ) > + goto do_ex_hypercall; > + > + __set_bit(vpid, &flush->processor_mask); > + } > + } > + > + max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]); > + > + /* > + * Flush the entire address space if va is NULL or if there is not > + * enough space for gva_list. > + */ > + if ( !va || (ORDER_TO_BYTES(order) / HV_TLB_FLUSH_UNIT) > max_gvas ) > + ret = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, > + virt_to_maddr(flush), 0); > + else > + { > + unsigned int gvas = fill_gva_list(flush->gva_list, va, order); > + > + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, gvas, 0, > + virt_to_maddr(flush), 0); > + } > + > + goto done; > + > + do_ex_hypercall: > + ret = flush_tlb_ex(mask, va, flags); > + > + done: > + local_irq_restore(irq_flags); > + > + return ret & HV_HYPERCALL_RESULT_MASK; Will this return an error code that uses the same space as Xen's errno values? > } > > /* > diff --git a/xen/arch/x86/guest/hyperv/util.c b/xen/arch/x86/guest/hyperv/util.c > new file mode 100644 > index 0000000000..9d0b5f4a46 > --- /dev/null > +++ b/xen/arch/x86/guest/hyperv/util.c > @@ -0,0 +1,72 @@ > +/****************************************************************************** > + * arch/x86/guest/hyperv/util.c > + * > + * Hyper-V utility functions > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; If not, see <http://www.gnu.org/licenses/>. > + * > + * Copyright (c) 2020 Microsoft. > + */ > + > +#include <xen/cpu.h> > +#include <xen/cpumask.h> > +#include <xen/errno.h> > + > +#include <asm/guest/hyperv.h> > +#include <asm/guest/hyperv-tlfs.h> > + > +#include "private.h" > + > +int cpumask_to_vpset(struct hv_vpset *vpset, > + const cpumask_t *mask) > +{ > + int nr = 1, cpu, vcpu_bank, vcpu_offset; > + int max_banks = ms_hyperv.max_vp_index / 64; I think nr whats to be int (to match the function return type), but the rest should be unsigned ints, specially because they are used as array indexes. > + > + /* Up to 64 banks can be represented by valid_bank_mask */ > + if ( max_banks >= 64 ) > + return -1; E2BIG or some such? > + > + /* Clear all banks to avoid flushing unwanted CPUs */ > + for ( vcpu_bank = 0; vcpu_bank <= max_banks; vcpu_bank++ ) > + vpset->bank_contents[vcpu_bank] = 0; > + > + vpset->valid_bank_mask = 0; > + > + for_each_cpu ( cpu, mask ) > + { > + int vcpu = hv_vp_index(cpu); unsigned int or uint32_t (which is the tyupe that hv_vp_index returns). Thanks, Roger.
On 12.02.2020 18:43, Roger Pau Monné wrote: > On Wed, Feb 12, 2020 at 04:09:18PM +0000, Wei Liu wrote: >> Implement L0 assisted TLB flush for Xen on Hyper-V. It takes advantage >> of several hypercalls: >> >> * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST >> * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX >> * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE >> * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX >> >> Pick the most efficient hypercalls available. >> >> Signed-off-by: Wei Liu <liuwe@microsoft.com> >> --- >> xen/arch/x86/guest/hyperv/Makefile | 1 + >> xen/arch/x86/guest/hyperv/private.h | 9 ++ >> xen/arch/x86/guest/hyperv/tlb.c | 172 +++++++++++++++++++++++++++- >> xen/arch/x86/guest/hyperv/util.c | 72 ++++++++++++ >> 4 files changed, 253 insertions(+), 1 deletion(-) >> create mode 100644 xen/arch/x86/guest/hyperv/util.c >> >> diff --git a/xen/arch/x86/guest/hyperv/Makefile b/xen/arch/x86/guest/hyperv/Makefile >> index 18902c33e9..0e39410968 100644 >> --- a/xen/arch/x86/guest/hyperv/Makefile >> +++ b/xen/arch/x86/guest/hyperv/Makefile >> @@ -1,2 +1,3 @@ >> obj-y += hyperv.o >> obj-y += tlb.o >> +obj-y += util.o >> diff --git a/xen/arch/x86/guest/hyperv/private.h b/xen/arch/x86/guest/hyperv/private.h >> index 78e52f74ce..311f060495 100644 >> --- a/xen/arch/x86/guest/hyperv/private.h >> +++ b/xen/arch/x86/guest/hyperv/private.h >> @@ -24,12 +24,21 @@ >> >> #include <xen/cpumask.h> >> #include <xen/percpu.h> >> +#include <xen/types.h> >> >> DECLARE_PER_CPU(void *, hv_input_page); >> DECLARE_PER_CPU(void *, hv_vp_assist); >> DECLARE_PER_CPU(uint32_t, hv_vp_index); >> >> +static inline uint32_t hv_vp_index(int cpu) > > unsigned int for cpu. And also for the return type, as per my comment on patch 1. >> --- a/xen/arch/x86/guest/hyperv/tlb.c >> +++ b/xen/arch/x86/guest/hyperv/tlb.c >> @@ -19,15 +19,185 @@ >> * Copyright (c) 2020 Microsoft. >> */ >> >> +#include <xen/cpu.h> >> #include <xen/cpumask.h> >> #include <xen/errno.h> >> >> +#include <asm/guest/hyperv.h> >> +#include <asm/guest/hyperv-hcall.h> >> +#include <asm/guest/hyperv-tlfs.h> >> + >> #include "private.h" >> >> +/* >> + * It is possible to encode up to 4096 pages using the lower 12 bits >> + * in an element of gva_list >> + */ >> +#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) >> +#define ORDER_TO_BYTES(order) ((1ul << (order)) * PAGE_SIZE) > > There are already some conversion functions in xen/mm.h > (get_order_from_{bytes/pages}), maybe you could add a > get_bytes_from_order helper there? I don't think a macro (or helper function) is worthwhile here - we don't have any in the various other places that do the same. The above should be used inline, preferably in the simpler form of PAGE_SIZE << order. Jan
On Wed, Feb 12, 2020 at 06:43:47PM +0100, Roger Pau Monné wrote: > On Wed, Feb 12, 2020 at 04:09:18PM +0000, Wei Liu wrote: > > Implement L0 assisted TLB flush for Xen on Hyper-V. It takes advantage > > of several hypercalls: > > > > * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST > > * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX > > * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE > > * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX > > > > Pick the most efficient hypercalls available. > > > > Signed-off-by: Wei Liu <liuwe@microsoft.com> > > --- > > xen/arch/x86/guest/hyperv/Makefile | 1 + > > xen/arch/x86/guest/hyperv/private.h | 9 ++ > > xen/arch/x86/guest/hyperv/tlb.c | 172 +++++++++++++++++++++++++++- > > xen/arch/x86/guest/hyperv/util.c | 72 ++++++++++++ > > 4 files changed, 253 insertions(+), 1 deletion(-) > > create mode 100644 xen/arch/x86/guest/hyperv/util.c > > > > diff --git a/xen/arch/x86/guest/hyperv/Makefile b/xen/arch/x86/guest/hyperv/Makefile > > index 18902c33e9..0e39410968 100644 > > --- a/xen/arch/x86/guest/hyperv/Makefile > > +++ b/xen/arch/x86/guest/hyperv/Makefile > > @@ -1,2 +1,3 @@ > > obj-y += hyperv.o > > obj-y += tlb.o > > +obj-y += util.o > > diff --git a/xen/arch/x86/guest/hyperv/private.h b/xen/arch/x86/guest/hyperv/private.h > > index 78e52f74ce..311f060495 100644 > > --- a/xen/arch/x86/guest/hyperv/private.h > > +++ b/xen/arch/x86/guest/hyperv/private.h > > @@ -24,12 +24,21 @@ > > > > #include <xen/cpumask.h> > > #include <xen/percpu.h> > > +#include <xen/types.h> > > > > DECLARE_PER_CPU(void *, hv_input_page); > > DECLARE_PER_CPU(void *, hv_vp_assist); > > DECLARE_PER_CPU(uint32_t, hv_vp_index); > > > > +static inline uint32_t hv_vp_index(int cpu) > > unsigned int for cpu. > > > +{ > > + return per_cpu(hv_vp_index, cpu); > > +} > > + > > int hyperv_flush_tlb(const cpumask_t *mask, const void *va, > > unsigned int flags); > > > > +/* Returns number of banks, -ev if error */ > > +int cpumask_to_vpset(struct hv_vpset *vpset, const cpumask_t *mask); > > + > > #endif /* __XEN_HYPERV_PRIVIATE_H__ */ > > diff --git a/xen/arch/x86/guest/hyperv/tlb.c b/xen/arch/x86/guest/hyperv/tlb.c > > index 48f527229e..99b789d9e9 100644 > > --- a/xen/arch/x86/guest/hyperv/tlb.c > > +++ b/xen/arch/x86/guest/hyperv/tlb.c > > @@ -19,15 +19,185 @@ > > * Copyright (c) 2020 Microsoft. > > */ > > > > +#include <xen/cpu.h> > > #include <xen/cpumask.h> > > #include <xen/errno.h> > > > > +#include <asm/guest/hyperv.h> > > +#include <asm/guest/hyperv-hcall.h> > > +#include <asm/guest/hyperv-tlfs.h> > > + > > #include "private.h" > > > > +/* > > + * It is possible to encode up to 4096 pages using the lower 12 bits > > + * in an element of gva_list > > + */ > > +#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) > > +#define ORDER_TO_BYTES(order) ((1ul << (order)) * PAGE_SIZE) > > There are already some conversion functions in xen/mm.h > (get_order_from_{bytes/pages}), maybe you could add a > get_bytes_from_order helper there? > > > + > > +static unsigned int fill_gva_list(uint64_t *gva_list, const void *va, > > + unsigned int order) > > +{ > > + unsigned long start = (unsigned long)va; > > + unsigned long end = start + ORDER_TO_BYTES(order) - 1; > > + unsigned int n = 0; > > + > > + do { > > + unsigned long remain = end > start ? end - start : 0; > > I don't think you can get here with end == start? > > As that's the condition of the loop, and order 0 is going to set > end = start + 4096 - 1. Correct. This can be simplified as remain = end - start . > > > + > > + gva_list[n] = start & PAGE_MASK; > > + > > + /* > > + * Use lower 12 bits to encode the number of additional pages > > + * to flush > > + */ > > + if ( remain >= HV_TLB_FLUSH_UNIT ) > > + { > > + gva_list[n] |= ~PAGE_MASK; > > + start += HV_TLB_FLUSH_UNIT; > > + } > > + else if ( remain ) > > + { > > + gva_list[n] |= (remain - 1) >> PAGE_SHIFT; > > + start = end; > > + } > > + > > + n++; > > + } while ( start < end ); > > + > > + return n; > > +} > > + > > +static uint64_t flush_tlb_ex(const cpumask_t *mask, const void *va, > > + unsigned int flags) > > +{ > > + struct hv_tlb_flush_ex *flush = this_cpu(hv_input_page); > > + int nr_banks; > > + unsigned int max_gvas; > > + unsigned int order = flags & FLUSH_ORDER_MASK; > > + uint64_t ret; > > + > > + ASSERT(flush); > > + ASSERT(!local_irq_is_enabled()); > > Can you turn this into an if condition with ASSERT_UNREACHABLE and > return ~0ULL? (as I think that signals an error). > There is no need for that. This function will always be internal to Hyper-V in the foreseeable future. If it is ever called with IRQ enabled something is wrong with the code. > > + > > + if ( !(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED) ) > > + return ~0ULL; > > + > > + flush->address_space = 0; > > + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; > > + if ( !(flags & FLUSH_TLB_GLOBAL) ) > > + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; > > + > > + flush->hv_vp_set.valid_bank_mask = 0; > > + flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; > > + > > + nr_banks = cpumask_to_vpset(&flush->hv_vp_set, mask); > > + if ( nr_banks < 0 ) > > + return ~0ULL; > > + > > + max_gvas = > > + (PAGE_SIZE - sizeof(*flush) - nr_banks * > > + sizeof(flush->hv_vp_set.bank_contents[0])) / > > + sizeof(uint64_t); /* gva is represented as uint64_t */ > > + > > + /* > > + * Flush the entire address space if va is NULL or if there is not > > + * enough space for gva_list. > > + */ > > + if ( !va || (ORDER_TO_BYTES(order) / HV_TLB_FLUSH_UNIT) > max_gvas ) > > + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 0, > > + nr_banks, virt_to_maddr(flush), 0); > > + else > > + { > > + uint64_t *gva_list = (uint64_t *)flush + sizeof(*flush) + nr_banks; > > Don't you need nr_banks * sizeof(flush->hv_vp_set.bank_contents) in > order to calculate the position of the gva_list? > The pointer arithmetic is done on uint64_t pointers so it already takes into account sizeof(bank_contents[0]). > > + unsigned int gvas = fill_gva_list(gva_list, va, order); > > + > > + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, > > + gvas, nr_banks, virt_to_maddr(flush), 0); > > + } > > + > > + return ret; > > +} > > + > > int hyperv_flush_tlb(const cpumask_t *mask, const void *va, > > unsigned int flags) > > { > > - return -EOPNOTSUPP; > > + unsigned long irq_flags; > > + struct hv_tlb_flush *flush = this_cpu(hv_input_page); > > + uint64_t ret; > > + unsigned int order = flags & FLUSH_ORDER_MASK; > > + unsigned int max_gvas; > > + > > + ASSERT(flush); > > + ASSERT(!cpumask_empty(mask)); > > + > > + local_irq_save(irq_flags); > > + > > + flush->address_space = 0; > > + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; > > + flush->processor_mask = 0; > > + if ( !(flags & FLUSH_TLB_GLOBAL) ) > > + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; > > + > > + if ( cpumask_equal(mask, &cpu_online_map) ) > > + flush->flags |= HV_FLUSH_ALL_PROCESSORS; > > + else > > + { > > + int cpu; > > unsigned int. > I picked int here and above because all the cpumask functions return int. I don't mind changing it to unsigned int -- it makes no practical difference. > > + > > + /* > > + * Normally VP indices are in ascending order and match Xen's > > + * idea of CPU ids. Check the last index to see if VP index is > > + * >= 64. If so, we can skip setting up parameters for > > + * non-applicable hypercalls without looking further. > > + */ > > + if ( hv_vp_index(cpumask_last(mask)) >= 64 ) > > + goto do_ex_hypercall; > > + > > + for_each_cpu ( cpu, mask ) > > + { > > + uint32_t vpid = hv_vp_index(cpu); > > + > > + if ( vpid > ms_hyperv.max_vp_index ) > > + { > > + local_irq_restore(irq_flags); > > + return -ENXIO; > > + } > > + > > + if ( vpid >= 64 ) > > + goto do_ex_hypercall; > > + > > + __set_bit(vpid, &flush->processor_mask); > > + } > > + } > > + > > + max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]); > > + > > + /* > > + * Flush the entire address space if va is NULL or if there is not > > + * enough space for gva_list. > > + */ > > + if ( !va || (ORDER_TO_BYTES(order) / HV_TLB_FLUSH_UNIT) > max_gvas ) > > + ret = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, > > + virt_to_maddr(flush), 0); > > + else > > + { > > + unsigned int gvas = fill_gva_list(flush->gva_list, va, order); > > + > > + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, gvas, 0, > > + virt_to_maddr(flush), 0); > > + } > > + > > + goto done; > > + > > + do_ex_hypercall: > > + ret = flush_tlb_ex(mask, va, flags); > > + > > + done: > > + local_irq_restore(irq_flags); > > + > > + return ret & HV_HYPERCALL_RESULT_MASK; > > Will this return an error code that uses the same space as Xen's errno > values? > No, it won't. It returns Hyper-V's status code (0 still means success). I didn't think that was a big deal because non-zero values meant errors. And the upper layer didn't care about the exact error values (yet). > > } > > > > /* > > diff --git a/xen/arch/x86/guest/hyperv/util.c b/xen/arch/x86/guest/hyperv/util.c > > new file mode 100644 > > index 0000000000..9d0b5f4a46 > > --- /dev/null > > +++ b/xen/arch/x86/guest/hyperv/util.c > > @@ -0,0 +1,72 @@ > > +/****************************************************************************** > > + * arch/x86/guest/hyperv/util.c > > + * > > + * Hyper-V utility functions > > + * > > + * This program is free software; you can redistribute it and/or modify > > + * it under the terms of the GNU General Public License as published by > > + * the Free Software Foundation; either version 2 of the License, or > > + * (at your option) any later version. > > + * > > + * This program is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > + * GNU General Public License for more details. > > + * > > + * You should have received a copy of the GNU General Public License > > + * along with this program; If not, see <http://www.gnu.org/licenses/>. > > + * > > + * Copyright (c) 2020 Microsoft. > > + */ > > + > > +#include <xen/cpu.h> > > +#include <xen/cpumask.h> > > +#include <xen/errno.h> > > + > > +#include <asm/guest/hyperv.h> > > +#include <asm/guest/hyperv-tlfs.h> > > + > > +#include "private.h" > > + > > +int cpumask_to_vpset(struct hv_vpset *vpset, > > + const cpumask_t *mask) > > +{ > > + int nr = 1, cpu, vcpu_bank, vcpu_offset; > > + int max_banks = ms_hyperv.max_vp_index / 64; > > I think nr whats to be int (to match the function return type), but > the rest should be unsigned ints, specially because they are used as > array indexes. > OK. > > + > > + /* Up to 64 banks can be represented by valid_bank_mask */ > > + if ( max_banks >= 64 ) > > + return -1; > > E2BIG or some such? > Right. That's better than -1. > > + > > + /* Clear all banks to avoid flushing unwanted CPUs */ > > + for ( vcpu_bank = 0; vcpu_bank <= max_banks; vcpu_bank++ ) > > + vpset->bank_contents[vcpu_bank] = 0; > > + > > + vpset->valid_bank_mask = 0; > > + > > + for_each_cpu ( cpu, mask ) > > + { > > + int vcpu = hv_vp_index(cpu); > > unsigned int or uint32_t (which is the tyupe that hv_vp_index > returns). > > Thanks, Roger.
On Thu, Feb 13, 2020 at 10:49:39AM +0100, Jan Beulich wrote: > >> diff --git a/xen/arch/x86/guest/hyperv/Makefile b/xen/arch/x86/guest/hyperv/Makefile > >> index 18902c33e9..0e39410968 100644 > >> --- a/xen/arch/x86/guest/hyperv/Makefile > >> +++ b/xen/arch/x86/guest/hyperv/Makefile > >> @@ -1,2 +1,3 @@ > >> obj-y += hyperv.o > >> obj-y += tlb.o > >> +obj-y += util.o > >> diff --git a/xen/arch/x86/guest/hyperv/private.h b/xen/arch/x86/guest/hyperv/private.h > >> index 78e52f74ce..311f060495 100644 > >> --- a/xen/arch/x86/guest/hyperv/private.h > >> +++ b/xen/arch/x86/guest/hyperv/private.h > >> @@ -24,12 +24,21 @@ > >> > >> #include <xen/cpumask.h> > >> #include <xen/percpu.h> > >> +#include <xen/types.h> > >> > >> DECLARE_PER_CPU(void *, hv_input_page); > >> DECLARE_PER_CPU(void *, hv_vp_assist); > >> DECLARE_PER_CPU(uint32_t, hv_vp_index); > >> > >> +static inline uint32_t hv_vp_index(int cpu) > > > > unsigned int for cpu. > > And also for the return type, as per my comment on patch 1. Ack.
On Thu, Feb 13, 2020 at 12:20:33PM +0000, Wei Liu wrote: > On Wed, Feb 12, 2020 at 06:43:47PM +0100, Roger Pau Monné wrote: > > On Wed, Feb 12, 2020 at 04:09:18PM +0000, Wei Liu wrote: > > > +static uint64_t flush_tlb_ex(const cpumask_t *mask, const void *va, > > > + unsigned int flags) > > > +{ > > > + struct hv_tlb_flush_ex *flush = this_cpu(hv_input_page); > > > + int nr_banks; > > > + unsigned int max_gvas; > > > + unsigned int order = flags & FLUSH_ORDER_MASK; > > > + uint64_t ret; > > > + > > > + ASSERT(flush); > > > + ASSERT(!local_irq_is_enabled()); > > > > Can you turn this into an if condition with ASSERT_UNREACHABLE and > > return ~0ULL? (as I think that signals an error). > > > > There is no need for that. This function will always be internal to > Hyper-V in the foreseeable future. If it is ever called with IRQ enabled > something is wrong with the code. But iff it ever manages to be called violating one of those conditions things will go badly I assume? It would be better to stay on the safe side and simply return an error when the conditions are no meet, and assert in the debug build. > > > > + > > > + if ( !(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED) ) > > > + return ~0ULL; > > > + > > > + flush->address_space = 0; > > > + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; > > > + if ( !(flags & FLUSH_TLB_GLOBAL) ) > > > + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; > > > + > > > + flush->hv_vp_set.valid_bank_mask = 0; > > > + flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; > > > + > > > + nr_banks = cpumask_to_vpset(&flush->hv_vp_set, mask); > > > + if ( nr_banks < 0 ) > > > + return ~0ULL; > > > + > > > + max_gvas = > > > + (PAGE_SIZE - sizeof(*flush) - nr_banks * > > > + sizeof(flush->hv_vp_set.bank_contents[0])) / > > > + sizeof(uint64_t); /* gva is represented as uint64_t */ > > > + > > > + /* > > > + * Flush the entire address space if va is NULL or if there is not > > > + * enough space for gva_list. > > > + */ > > > + if ( !va || (ORDER_TO_BYTES(order) / HV_TLB_FLUSH_UNIT) > max_gvas ) > > > + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 0, > > > + nr_banks, virt_to_maddr(flush), 0); > > > + else > > > + { > > > + uint64_t *gva_list = (uint64_t *)flush + sizeof(*flush) + nr_banks; > > > > Don't you need nr_banks * sizeof(flush->hv_vp_set.bank_contents) in > > order to calculate the position of the gva_list? > > > > The pointer arithmetic is done on uint64_t pointers so it already takes > into account sizeof(bank_contents[0]). Oh, then the sizeof(*flush) should be divided by sizeof(uint64_t)? > > > + unsigned int gvas = fill_gva_list(gva_list, va, order); > > > + > > > + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, > > > + gvas, nr_banks, virt_to_maddr(flush), 0); > > > + } > > > + > > > + return ret; > > > +} > > > + > > > int hyperv_flush_tlb(const cpumask_t *mask, const void *va, > > > unsigned int flags) > > > { > > > - return -EOPNOTSUPP; > > > + unsigned long irq_flags; > > > + struct hv_tlb_flush *flush = this_cpu(hv_input_page); > > > + uint64_t ret; > > > + unsigned int order = flags & FLUSH_ORDER_MASK; > > > + unsigned int max_gvas; > > > + > > > + ASSERT(flush); > > > + ASSERT(!cpumask_empty(mask)); > > > + > > > + local_irq_save(irq_flags); > > > + > > > + flush->address_space = 0; > > > + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; > > > + flush->processor_mask = 0; > > > + if ( !(flags & FLUSH_TLB_GLOBAL) ) > > > + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; > > > + > > > + if ( cpumask_equal(mask, &cpu_online_map) ) > > > + flush->flags |= HV_FLUSH_ALL_PROCESSORS; > > > + else > > > + { > > > + int cpu; > > > > unsigned int. > > > > I picked int here and above because all the cpumask functions return > int. I don't mind changing it to unsigned int -- it makes no practical > difference. Those should likely return unsigned ints also, as I don't think cpumask can return errors. I prefer unsigned int, since negative cpu values make no sense. > > > + > > > + /* > > > + * Normally VP indices are in ascending order and match Xen's > > > + * idea of CPU ids. Check the last index to see if VP index is > > > + * >= 64. If so, we can skip setting up parameters for > > > + * non-applicable hypercalls without looking further. > > > + */ > > > + if ( hv_vp_index(cpumask_last(mask)) >= 64 ) > > > + goto do_ex_hypercall; > > > + > > > + for_each_cpu ( cpu, mask ) > > > + { > > > + uint32_t vpid = hv_vp_index(cpu); > > > + > > > + if ( vpid > ms_hyperv.max_vp_index ) > > > + { > > > + local_irq_restore(irq_flags); > > > + return -ENXIO; > > > + } > > > + > > > + if ( vpid >= 64 ) > > > + goto do_ex_hypercall; > > > + > > > + __set_bit(vpid, &flush->processor_mask); > > > + } > > > + } > > > + > > > + max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]); > > > + > > > + /* > > > + * Flush the entire address space if va is NULL or if there is not > > > + * enough space for gva_list. > > > + */ > > > + if ( !va || (ORDER_TO_BYTES(order) / HV_TLB_FLUSH_UNIT) > max_gvas ) > > > + ret = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, > > > + virt_to_maddr(flush), 0); > > > + else > > > + { > > > + unsigned int gvas = fill_gva_list(flush->gva_list, va, order); > > > + > > > + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, gvas, 0, > > > + virt_to_maddr(flush), 0); > > > + } > > > + > > > + goto done; > > > + > > > + do_ex_hypercall: > > > + ret = flush_tlb_ex(mask, va, flags); > > > + > > > + done: > > > + local_irq_restore(irq_flags); > > > + > > > + return ret & HV_HYPERCALL_RESULT_MASK; > > > > Will this return an error code that uses the same space as Xen's errno > > values? > > > > No, it won't. It returns Hyper-V's status code (0 still means success). > > I didn't think that was a big deal because non-zero values meant errors. > And the upper layer didn't care about the exact error values (yet). Hm, I would rather have this return an error value in the errno.h range. ie: return ret & HV_HYPERCALL_RESULT_MASK ? -EINVAL : 0; Or something along this lines, but long term you will need some kind of mapping between HyperV and Xen error codes IMO. Thanks, Roger.
On Thu, Feb 13, 2020 at 01:41:27PM +0100, Roger Pau Monné wrote: > On Thu, Feb 13, 2020 at 12:20:33PM +0000, Wei Liu wrote: > > On Wed, Feb 12, 2020 at 06:43:47PM +0100, Roger Pau Monné wrote: > > > On Wed, Feb 12, 2020 at 04:09:18PM +0000, Wei Liu wrote: > > > > +static uint64_t flush_tlb_ex(const cpumask_t *mask, const void *va, > > > > + unsigned int flags) > > > > +{ > > > > + struct hv_tlb_flush_ex *flush = this_cpu(hv_input_page); > > > > + int nr_banks; > > > > + unsigned int max_gvas; > > > > + unsigned int order = flags & FLUSH_ORDER_MASK; > > > > + uint64_t ret; > > > > + > > > > + ASSERT(flush); > > > > + ASSERT(!local_irq_is_enabled()); > > > > > > Can you turn this into an if condition with ASSERT_UNREACHABLE and > > > return ~0ULL? (as I think that signals an error). > > > > > > > There is no need for that. This function will always be internal to > > Hyper-V in the foreseeable future. If it is ever called with IRQ enabled > > something is wrong with the code. > > But iff it ever manages to be called violating one of those conditions > things will go badly I assume? > > It would be better to stay on the safe side and simply return an error > when the conditions are no meet, and assert in the debug build. OK. > > > > > > > + > > > > + if ( !(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED) ) > > > > + return ~0ULL; > > > > + > > > > + flush->address_space = 0; > > > > + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; > > > > + if ( !(flags & FLUSH_TLB_GLOBAL) ) > > > > + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; > > > > + > > > > + flush->hv_vp_set.valid_bank_mask = 0; > > > > + flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; > > > > + > > > > + nr_banks = cpumask_to_vpset(&flush->hv_vp_set, mask); > > > > + if ( nr_banks < 0 ) > > > > + return ~0ULL; > > > > + > > > > + max_gvas = > > > > + (PAGE_SIZE - sizeof(*flush) - nr_banks * > > > > + sizeof(flush->hv_vp_set.bank_contents[0])) / > > > > + sizeof(uint64_t); /* gva is represented as uint64_t */ > > > > + > > > > + /* > > > > + * Flush the entire address space if va is NULL or if there is not > > > > + * enough space for gva_list. > > > > + */ > > > > + if ( !va || (ORDER_TO_BYTES(order) / HV_TLB_FLUSH_UNIT) > max_gvas ) > > > > + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 0, > > > > + nr_banks, virt_to_maddr(flush), 0); > > > > + else > > > > + { > > > > + uint64_t *gva_list = (uint64_t *)flush + sizeof(*flush) + nr_banks; > > > > > > Don't you need nr_banks * sizeof(flush->hv_vp_set.bank_contents) in > > > order to calculate the position of the gva_list? > > > > > > > The pointer arithmetic is done on uint64_t pointers so it already takes > > into account sizeof(bank_contents[0]). > > Oh, then the sizeof(*flush) should be divided by sizeof(uint64_t)? > Yes. I think so. Thanks for catching this. [...] > > > > + do_ex_hypercall: > > > > + ret = flush_tlb_ex(mask, va, flags); > > > > + > > > > + done: > > > > + local_irq_restore(irq_flags); > > > > + > > > > + return ret & HV_HYPERCALL_RESULT_MASK; > > > > > > Will this return an error code that uses the same space as Xen's errno > > > values? > > > > > > > No, it won't. It returns Hyper-V's status code (0 still means success). > > > > I didn't think that was a big deal because non-zero values meant errors. > > And the upper layer didn't care about the exact error values (yet). > > Hm, I would rather have this return an error value in the errno.h > range. ie: > > return ret & HV_HYPERCALL_RESULT_MASK ? -EINVAL : 0; > Sure this can be done. I would use ENXIO rather than EINVAL though. > Or something along this lines, but long term you will need some kind > of mapping between HyperV and Xen error codes IMO. > Yes. When we need more sophisticated handling of error codes. Wei. > Thanks, Roger.
diff --git a/xen/arch/x86/guest/hyperv/Makefile b/xen/arch/x86/guest/hyperv/Makefile index 18902c33e9..0e39410968 100644 --- a/xen/arch/x86/guest/hyperv/Makefile +++ b/xen/arch/x86/guest/hyperv/Makefile @@ -1,2 +1,3 @@ obj-y += hyperv.o obj-y += tlb.o +obj-y += util.o diff --git a/xen/arch/x86/guest/hyperv/private.h b/xen/arch/x86/guest/hyperv/private.h index 78e52f74ce..311f060495 100644 --- a/xen/arch/x86/guest/hyperv/private.h +++ b/xen/arch/x86/guest/hyperv/private.h @@ -24,12 +24,21 @@ #include <xen/cpumask.h> #include <xen/percpu.h> +#include <xen/types.h> DECLARE_PER_CPU(void *, hv_input_page); DECLARE_PER_CPU(void *, hv_vp_assist); DECLARE_PER_CPU(uint32_t, hv_vp_index); +static inline uint32_t hv_vp_index(int cpu) +{ + return per_cpu(hv_vp_index, cpu); +} + int hyperv_flush_tlb(const cpumask_t *mask, const void *va, unsigned int flags); +/* Returns number of banks, -ev if error */ +int cpumask_to_vpset(struct hv_vpset *vpset, const cpumask_t *mask); + #endif /* __XEN_HYPERV_PRIVIATE_H__ */ diff --git a/xen/arch/x86/guest/hyperv/tlb.c b/xen/arch/x86/guest/hyperv/tlb.c index 48f527229e..99b789d9e9 100644 --- a/xen/arch/x86/guest/hyperv/tlb.c +++ b/xen/arch/x86/guest/hyperv/tlb.c @@ -19,15 +19,185 @@ * Copyright (c) 2020 Microsoft. */ +#include <xen/cpu.h> #include <xen/cpumask.h> #include <xen/errno.h> +#include <asm/guest/hyperv.h> +#include <asm/guest/hyperv-hcall.h> +#include <asm/guest/hyperv-tlfs.h> + #include "private.h" +/* + * It is possible to encode up to 4096 pages using the lower 12 bits + * in an element of gva_list + */ +#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) +#define ORDER_TO_BYTES(order) ((1ul << (order)) * PAGE_SIZE) + +static unsigned int fill_gva_list(uint64_t *gva_list, const void *va, + unsigned int order) +{ + unsigned long start = (unsigned long)va; + unsigned long end = start + ORDER_TO_BYTES(order) - 1; + unsigned int n = 0; + + do { + unsigned long remain = end > start ? end - start : 0; + + gva_list[n] = start & PAGE_MASK; + + /* + * Use lower 12 bits to encode the number of additional pages + * to flush + */ + if ( remain >= HV_TLB_FLUSH_UNIT ) + { + gva_list[n] |= ~PAGE_MASK; + start += HV_TLB_FLUSH_UNIT; + } + else if ( remain ) + { + gva_list[n] |= (remain - 1) >> PAGE_SHIFT; + start = end; + } + + n++; + } while ( start < end ); + + return n; +} + +static uint64_t flush_tlb_ex(const cpumask_t *mask, const void *va, + unsigned int flags) +{ + struct hv_tlb_flush_ex *flush = this_cpu(hv_input_page); + int nr_banks; + unsigned int max_gvas; + unsigned int order = flags & FLUSH_ORDER_MASK; + uint64_t ret; + + ASSERT(flush); + ASSERT(!local_irq_is_enabled()); + + if ( !(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED) ) + return ~0ULL; + + flush->address_space = 0; + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + if ( !(flags & FLUSH_TLB_GLOBAL) ) + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; + + flush->hv_vp_set.valid_bank_mask = 0; + flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + + nr_banks = cpumask_to_vpset(&flush->hv_vp_set, mask); + if ( nr_banks < 0 ) + return ~0ULL; + + max_gvas = + (PAGE_SIZE - sizeof(*flush) - nr_banks * + sizeof(flush->hv_vp_set.bank_contents[0])) / + sizeof(uint64_t); /* gva is represented as uint64_t */ + + /* + * Flush the entire address space if va is NULL or if there is not + * enough space for gva_list. + */ + if ( !va || (ORDER_TO_BYTES(order) / HV_TLB_FLUSH_UNIT) > max_gvas ) + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 0, + nr_banks, virt_to_maddr(flush), 0); + else + { + uint64_t *gva_list = (uint64_t *)flush + sizeof(*flush) + nr_banks; + unsigned int gvas = fill_gva_list(gva_list, va, order); + + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, + gvas, nr_banks, virt_to_maddr(flush), 0); + } + + return ret; +} + int hyperv_flush_tlb(const cpumask_t *mask, const void *va, unsigned int flags) { - return -EOPNOTSUPP; + unsigned long irq_flags; + struct hv_tlb_flush *flush = this_cpu(hv_input_page); + uint64_t ret; + unsigned int order = flags & FLUSH_ORDER_MASK; + unsigned int max_gvas; + + ASSERT(flush); + ASSERT(!cpumask_empty(mask)); + + local_irq_save(irq_flags); + + flush->address_space = 0; + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush->processor_mask = 0; + if ( !(flags & FLUSH_TLB_GLOBAL) ) + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; + + if ( cpumask_equal(mask, &cpu_online_map) ) + flush->flags |= HV_FLUSH_ALL_PROCESSORS; + else + { + int cpu; + + /* + * Normally VP indices are in ascending order and match Xen's + * idea of CPU ids. Check the last index to see if VP index is + * >= 64. If so, we can skip setting up parameters for + * non-applicable hypercalls without looking further. + */ + if ( hv_vp_index(cpumask_last(mask)) >= 64 ) + goto do_ex_hypercall; + + for_each_cpu ( cpu, mask ) + { + uint32_t vpid = hv_vp_index(cpu); + + if ( vpid > ms_hyperv.max_vp_index ) + { + local_irq_restore(irq_flags); + return -ENXIO; + } + + if ( vpid >= 64 ) + goto do_ex_hypercall; + + __set_bit(vpid, &flush->processor_mask); + } + } + + max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]); + + /* + * Flush the entire address space if va is NULL or if there is not + * enough space for gva_list. + */ + if ( !va || (ORDER_TO_BYTES(order) / HV_TLB_FLUSH_UNIT) > max_gvas ) + ret = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, + virt_to_maddr(flush), 0); + else + { + unsigned int gvas = fill_gva_list(flush->gva_list, va, order); + + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, gvas, 0, + virt_to_maddr(flush), 0); + } + + goto done; + + do_ex_hypercall: + ret = flush_tlb_ex(mask, va, flags); + + done: + local_irq_restore(irq_flags); + + return ret & HV_HYPERCALL_RESULT_MASK; } /* diff --git a/xen/arch/x86/guest/hyperv/util.c b/xen/arch/x86/guest/hyperv/util.c new file mode 100644 index 0000000000..9d0b5f4a46 --- /dev/null +++ b/xen/arch/x86/guest/hyperv/util.c @@ -0,0 +1,72 @@ +/****************************************************************************** + * arch/x86/guest/hyperv/util.c + * + * Hyper-V utility functions + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; If not, see <http://www.gnu.org/licenses/>. + * + * Copyright (c) 2020 Microsoft. + */ + +#include <xen/cpu.h> +#include <xen/cpumask.h> +#include <xen/errno.h> + +#include <asm/guest/hyperv.h> +#include <asm/guest/hyperv-tlfs.h> + +#include "private.h" + +int cpumask_to_vpset(struct hv_vpset *vpset, + const cpumask_t *mask) +{ + int nr = 1, cpu, vcpu_bank, vcpu_offset; + int max_banks = ms_hyperv.max_vp_index / 64; + + /* Up to 64 banks can be represented by valid_bank_mask */ + if ( max_banks >= 64 ) + return -1; + + /* Clear all banks to avoid flushing unwanted CPUs */ + for ( vcpu_bank = 0; vcpu_bank <= max_banks; vcpu_bank++ ) + vpset->bank_contents[vcpu_bank] = 0; + + vpset->valid_bank_mask = 0; + + for_each_cpu ( cpu, mask ) + { + int vcpu = hv_vp_index(cpu); + + vcpu_bank = vcpu / 64; + vcpu_offset = vcpu % 64; + + __set_bit(vcpu_offset, &vpset->bank_contents[vcpu_bank]); + __set_bit(vcpu_bank, &vpset->valid_bank_mask); + + if ( vcpu_bank >= nr ) + nr = vcpu_bank + 1; + } + + return nr; +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */
Implement L0 assisted TLB flush for Xen on Hyper-V. It takes advantage of several hypercalls: * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX Pick the most efficient hypercalls available. Signed-off-by: Wei Liu <liuwe@microsoft.com> --- xen/arch/x86/guest/hyperv/Makefile | 1 + xen/arch/x86/guest/hyperv/private.h | 9 ++ xen/arch/x86/guest/hyperv/tlb.c | 172 +++++++++++++++++++++++++++- xen/arch/x86/guest/hyperv/util.c | 72 ++++++++++++ 4 files changed, 253 insertions(+), 1 deletion(-) create mode 100644 xen/arch/x86/guest/hyperv/util.c