@@ -4047,17 +4047,12 @@ static void hvm_s3_resume(struct domain *d)
}
}
-static bool always_flush(void *ctxt, struct vcpu *v)
-{
- return true;
-}
-
static int hvmop_flush_tlb_all(void)
{
if ( !is_hvm_domain(current->domain) )
return -EINVAL;
- return paging_flush_tlb(always_flush, NULL) ? 0 : -ERESTART;
+ return paging_flush_tlb(NULL) ? 0 : -ERESTART;
}
static int hvmop_set_evtchn_upcall_vector(
@@ -574,13 +574,6 @@ static void vpmask_fill(struct hypercall_vpmask *vpmask)
bitmap_fill(vpmask->mask, HVM_MAX_VCPUS);
}
-static bool vpmask_test(const struct hypercall_vpmask *vpmask,
- unsigned int vp)
-{
- ASSERT(vp < HVM_MAX_VCPUS);
- return test_bit(vp, vpmask->mask);
-}
-
static unsigned int vpmask_first(const struct hypercall_vpmask *vpmask)
{
return find_first_bit(vpmask->mask, HVM_MAX_VCPUS);
@@ -669,17 +662,6 @@ static uint16_t hv_vpset_to_vpmask(const struct hv_vpset *set,
#undef NR_VPS_PER_BANK
}
-/*
- * Windows should not issue the hypercalls requiring this callback in the
- * case where vcpu_id would exceed the size of the mask.
- */
-static bool need_flush(void *ctxt, struct vcpu *v)
-{
- struct hypercall_vpmask *vpmask = ctxt;
-
- return vpmask_test(vpmask, v->vcpu_id);
-}
-
union hypercall_input {
uint64_t raw;
struct {
@@ -714,6 +696,7 @@ static int hvcall_flush(const union hypercall_input *input,
uint64_t flags;
uint64_t vcpu_mask;
} input_params;
+ unsigned long *vcpu_bitmap;
/* These hypercalls should never use the fast-call convention. */
if ( input->fast )
@@ -730,18 +713,19 @@ static int hvcall_flush(const union hypercall_input *input,
* so err on the safe side.
*/
if ( input_params.flags & HV_FLUSH_ALL_PROCESSORS )
- vpmask_fill(vpmask);
+ vcpu_bitmap = NULL;
else
{
vpmask_empty(vpmask);
vpmask_set(vpmask, 0, input_params.vcpu_mask);
+ vcpu_bitmap = vpmask->mask;
}
/*
* A false return means that another vcpu is currently trying
* a similar operation, so back off.
*/
- if ( !paging_flush_tlb(need_flush, vpmask) )
+ if ( !paging_flush_tlb(vcpu_bitmap) )
return -ERESTART;
output->rep_complete = input->rep_count;
@@ -760,6 +744,7 @@ static int hvcall_flush_ex(const union hypercall_input *input,
uint64_t flags;
struct hv_vpset set;
} input_params;
+ unsigned long *vcpu_bitmap;
/* These hypercalls should never use the fast-call convention. */
if ( input->fast )
@@ -771,7 +756,7 @@ static int hvcall_flush_ex(const union hypercall_input *input,
return -EINVAL;
if ( input_params.flags & HV_FLUSH_ALL_PROCESSORS )
- vpmask_fill(vpmask);
+ vcpu_bitmap = NULL;
else
{
union hypercall_vpset *vpset = &this_cpu(hypercall_vpset);
@@ -807,13 +792,15 @@ static int hvcall_flush_ex(const union hypercall_input *input,
rc = hv_vpset_to_vpmask(set, vpmask);
if ( rc )
return rc;
+
+ vcpu_bitmap = vpmask->mask;
}
/*
* A false return means that another vcpu is currently trying
* a similar operation, so back off.
*/
- if ( !paging_flush_tlb(need_flush, vpmask) )
+ if ( !paging_flush_tlb(vcpu_bitmap) )
return -ERESTART;
output->rep_complete = input->rep_count;
@@ -705,8 +705,13 @@ static void dummy_flush(void *data)
{
}
-static bool flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
- void *ctxt)
+static bool flush_vcpu(const struct vcpu *v, const unsigned long *vcpu_bitmap)
+{
+ return !vcpu_bitmap || test_bit(v->vcpu_id, vcpu_bitmap);
+}
+
+/* Flush TLB of selected vCPUs. NULL for all. */
+static bool flush_tlb(const unsigned long *vcpu_bitmap)
{
static DEFINE_PER_CPU(cpumask_t, flush_cpumask);
cpumask_t *mask = &this_cpu(flush_cpumask);
@@ -721,7 +726,7 @@ static bool flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
{
unsigned int cpu;
- if ( !flush_vcpu(ctxt, v) )
+ if ( !flush_vcpu(v, vcpu_bitmap) )
continue;
hvm_asid_flush_vcpu(v);
@@ -3069,9 +3069,13 @@ static void sh_clean_dirty_bitmap(struct domain *d)
}
-/* Fluhs TLB of selected vCPUs. */
-bool shadow_flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
- void *ctxt)
+static bool flush_vcpu(const struct vcpu *v, const unsigned long *vcpu_bitmap)
+{
+ return !vcpu_bitmap || test_bit(v->vcpu_id, vcpu_bitmap);
+}
+
+/* Flush TLB of selected vCPUs. NULL for all. */
+bool shadow_flush_tlb(const unsigned long *vcpu_bitmap)
{
static DEFINE_PER_CPU(cpumask_t, flush_cpumask);
cpumask_t *mask = &this_cpu(flush_cpumask);
@@ -3084,12 +3088,12 @@ bool shadow_flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
/* Pause all other vcpus. */
for_each_vcpu ( d, v )
- if ( v != current && flush_vcpu(ctxt, v) )
+ if ( v != current && flush_vcpu(v, vcpu_bitmap) )
vcpu_pause_nosync(v);
/* Now that all VCPUs are signalled to deschedule, we wait... */
for_each_vcpu ( d, v )
- if ( v != current && flush_vcpu(ctxt, v) )
+ if ( v != current && flush_vcpu(v, vcpu_bitmap) )
while ( !vcpu_runnable(v) && v->is_running )
cpu_relax();
@@ -3103,7 +3107,7 @@ bool shadow_flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
{
unsigned int cpu;
- if ( !flush_vcpu(ctxt, v) )
+ if ( !flush_vcpu(v, vcpu_bitmap) )
continue;
paging_update_cr3(v, false);
@@ -3118,7 +3122,7 @@ bool shadow_flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
/* Done. */
for_each_vcpu ( d, v )
- if ( v != current && flush_vcpu(ctxt, v) )
+ if ( v != current && flush_vcpu(v, vcpu_bitmap) )
vcpu_unpause(v);
return true;
@@ -922,8 +922,7 @@ static inline int sh_check_page_has_no_refs(struct page_info *page)
}
/* Flush the TLB of the selected vCPUs. */
-bool shadow_flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
- void *ctxt);
+bool shadow_flush_tlb(const unsigned long *vcpu_bitmap);
#endif /* _XEN_SHADOW_PRIVATE_H */
@@ -141,9 +141,7 @@ struct paging_mode {
void (*update_cr3 )(struct vcpu *v, int do_locking,
bool noflush);
void (*update_paging_modes )(struct vcpu *v);
- bool (*flush_tlb )(bool (*flush_vcpu)(void *ctxt,
- struct vcpu *v),
- void *ctxt);
+ bool (*flush_tlb )(const unsigned long *vcpu_bitmap);
unsigned int guest_levels;
@@ -417,11 +415,10 @@ static always_inline unsigned int paging_max_paddr_bits(const struct domain *d)
return bits;
}
-static inline bool paging_flush_tlb(bool (*flush_vcpu)(void *ctxt,
- struct vcpu *v),
- void *ctxt)
+/* Flush selected vCPUs TLBs. NULL for all. */
+static inline bool paging_flush_tlb(const unsigned long *vcpu_bitmap)
{
- return paging_get_hostmode(current)->flush_tlb(flush_vcpu, ctxt);
+ return paging_get_hostmode(current)->flush_tlb(vcpu_bitmap);
}
#endif /* XEN_PAGING_H */
TLB flushing is a hotpath, and function pointer calls are expensive (especially under repoline) for what amounts to an identity transform on the data. Just pass the vcpu_bitmap bitmap directly. As we use NULL for all rather than none, introduce a flush_vcpu() helper to avoid the risk of logical errors from opencoding the expression. This also means the viridian callers can avoid writing an all-ones bitmap for the flushing logic to consume. No functional change. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Jan Beulich <JBeulich@suse.com> CC: Roger Pau Monné <roger.pau@citrix.com> CC: Wei Liu <wl@xen.org> CC: Paul Durrant <paul@xen.org> The result even compiles smaller: $ ../scripts/bloat-o-meter xen-syms-before xen-syms-after add/remove: 0/2 grow/shrink: 0/3 up/down: 0/-125 (-125) Function old new delta flush_tlb 207 205 -2 always_flush 6 - -6 need_flush 9 - -9 do_hvm_op 2434 2418 -16 shadow_flush_tlb 628 536 -92 but this is very much not the point of the patch. It would be rather more efficient in this case and probably others for for_each_vcpu() to iterate over d->vcpu[] than follow the v->next_vcpu pointer chain (better locality of access), and also because then the vCPU id is the loop induction variable, not a value read from memory. --- xen/arch/x86/hvm/hvm.c | 7 +------ xen/arch/x86/hvm/viridian/viridian.c | 31 +++++++++---------------------- xen/arch/x86/mm/hap/hap.c | 11 ++++++++--- xen/arch/x86/mm/shadow/common.c | 18 +++++++++++------- xen/arch/x86/mm/shadow/private.h | 3 +-- xen/include/asm-x86/paging.h | 11 ++++------- 6 files changed, 34 insertions(+), 47 deletions(-)