Message ID | 20240429182823.1130436-3-andrew.cooper3@citrix.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | x86/xstate: Fixes to size calculations | expand |
On 29.04.2024 20:28, Andrew Cooper wrote: > @@ -567,16 +567,51 @@ static unsigned int hw_uncompressed_size(uint64_t xcr0) > return size; > } > > -/* Fastpath for common xstate size requests, avoiding reloads of xcr0. */ > -unsigned int xstate_ctxt_size(u64 xcr0) > +unsigned int xstate_uncompressed_size(uint64_t xcr0) > { > + unsigned int size = XSTATE_AREA_MIN_SIZE, i; > + > if ( xcr0 == xfeature_mask ) > return xsave_cntxt_size; > > if ( xcr0 == 0 ) /* TODO: clean up paths passing 0 in here. */ > return 0; > > - return hw_uncompressed_size(xcr0); > + if ( xcr0 <= (X86_XCR0_SSE | X86_XCR0_FP) ) This is open-coded XSTATE_FP_SSE, which I wouldn't mind if ... > + return size; > + > + /* > + * For the non-legacy states, search all activate states and find the > + * maximum offset+size. Some states (e.g. LWP, APX_F) are out-of-order > + * with respect their index. > + */ > + xcr0 &= ~XSTATE_FP_SSE; ... you didn't use that macro here (and once further down). IOW please be consistent, no matter which way round. > + for_each_set_bit ( i, &xcr0, 63 ) > + { > + unsigned int s; > + > + ASSERT(xstate_offsets[i] && xstate_sizes[i]); > + > + s = xstate_offsets[i] && xstate_sizes[i]; You mean + here, don't you? Then: Reviewed-by: Jan Beulich <jbeulich@suse.com> I'm also inclined to suggest making this the initializer of s. Jan
On 02/05/2024 1:19 pm, Jan Beulich wrote: > On 29.04.2024 20:28, Andrew Cooper wrote: >> @@ -567,16 +567,51 @@ static unsigned int hw_uncompressed_size(uint64_t xcr0) >> return size; >> } >> >> -/* Fastpath for common xstate size requests, avoiding reloads of xcr0. */ >> -unsigned int xstate_ctxt_size(u64 xcr0) >> +unsigned int xstate_uncompressed_size(uint64_t xcr0) >> { >> + unsigned int size = XSTATE_AREA_MIN_SIZE, i; >> + >> if ( xcr0 == xfeature_mask ) >> return xsave_cntxt_size; >> >> if ( xcr0 == 0 ) /* TODO: clean up paths passing 0 in here. */ >> return 0; >> >> - return hw_uncompressed_size(xcr0); >> + if ( xcr0 <= (X86_XCR0_SSE | X86_XCR0_FP) ) > This is open-coded XSTATE_FP_SSE, which I wouldn't mind if ... > >> + return size; >> + >> + /* >> + * For the non-legacy states, search all activate states and find the >> + * maximum offset+size. Some states (e.g. LWP, APX_F) are out-of-order >> + * with respect their index. >> + */ >> + xcr0 &= ~XSTATE_FP_SSE; > ... you didn't use that macro here (and once further down). IOW please > be consistent, no matter which way round. Oh yes - that's a consequence of these hunks having between written 3 years apart. It's important for the first one (logical comparison against a bitmap) that it's split apart. > >> + for_each_set_bit ( i, &xcr0, 63 ) >> + { >> + unsigned int s; >> + >> + ASSERT(xstate_offsets[i] && xstate_sizes[i]); >> + >> + s = xstate_offsets[i] && xstate_sizes[i]; > You mean + here, don't you? Yes I do... That was a victim of a last minute refactor. It also shows that even the cross-check with hardware isn't terribly effective. More on that in the other thread. > Then: > Reviewed-by: Jan Beulich <jbeulich@suse.com> > > I'm also inclined to suggest making this the initializer of s. Hmm, good point. Will change. Thanks, ~Andrew
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c index 9a72d57333e9..c2f2016ed45a 100644 --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -833,7 +833,7 @@ long arch_do_domctl( uint32_t offset = 0; #define PV_XSAVE_HDR_SIZE (2 * sizeof(uint64_t)) -#define PV_XSAVE_SIZE(xcr0) (PV_XSAVE_HDR_SIZE + xstate_ctxt_size(xcr0)) +#define PV_XSAVE_SIZE(xcr0) (PV_XSAVE_HDR_SIZE + xstate_uncompressed_size(xcr0)) ret = -ESRCH; if ( (evc->vcpu >= d->max_vcpus) || diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 9594e0a5c530..9e677d891d6c 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1191,7 +1191,7 @@ HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, NULL, hvm_load_cpu_ctxt, 1, #define HVM_CPU_XSAVE_SIZE(xcr0) (offsetof(struct hvm_hw_cpu_xsave, \ save_area) + \ - xstate_ctxt_size(xcr0)) + xstate_uncompressed_size(xcr0)) static int cf_check hvm_save_cpu_xsave_states( struct vcpu *v, hvm_domain_context_t *h) diff --git a/xen/arch/x86/include/asm/xstate.h b/xen/arch/x86/include/asm/xstate.h index c08c267884f0..f5115199d4f9 100644 --- a/xen/arch/x86/include/asm/xstate.h +++ b/xen/arch/x86/include/asm/xstate.h @@ -107,7 +107,7 @@ void compress_xsave_states(struct vcpu *v, const void *src, unsigned int size); void xstate_free_save_area(struct vcpu *v); int xstate_alloc_save_area(struct vcpu *v); void xstate_init(struct cpuinfo_x86 *c); -unsigned int xstate_ctxt_size(u64 xcr0); +unsigned int xstate_uncompressed_size(uint64_t xcr0); static inline uint64_t xgetbv(unsigned int index) { diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c index 99cedb4f5e24..a94f4025fce5 100644 --- a/xen/arch/x86/xstate.c +++ b/xen/arch/x86/xstate.c @@ -183,7 +183,7 @@ void expand_xsave_states(const struct vcpu *v, void *dest, unsigned int size) /* Check there is state to serialise (i.e. at least an XSAVE_HDR) */ BUG_ON(!v->arch.xcr0_accum); /* Check there is the correct room to decompress into. */ - BUG_ON(size != xstate_ctxt_size(v->arch.xcr0_accum)); + BUG_ON(size != xstate_uncompressed_size(v->arch.xcr0_accum)); if ( !(xstate->xsave_hdr.xcomp_bv & XSTATE_COMPACTION_ENABLED) ) { @@ -245,7 +245,7 @@ void compress_xsave_states(struct vcpu *v, const void *src, unsigned int size) u64 xstate_bv, valid; BUG_ON(!v->arch.xcr0_accum); - BUG_ON(size != xstate_ctxt_size(v->arch.xcr0_accum)); + BUG_ON(size != xstate_uncompressed_size(v->arch.xcr0_accum)); ASSERT(!xsave_area_compressed(src)); xstate_bv = ((const struct xsave_struct *)src)->xsave_hdr.xstate_bv; @@ -567,16 +567,51 @@ static unsigned int hw_uncompressed_size(uint64_t xcr0) return size; } -/* Fastpath for common xstate size requests, avoiding reloads of xcr0. */ -unsigned int xstate_ctxt_size(u64 xcr0) +unsigned int xstate_uncompressed_size(uint64_t xcr0) { + unsigned int size = XSTATE_AREA_MIN_SIZE, i; + if ( xcr0 == xfeature_mask ) return xsave_cntxt_size; if ( xcr0 == 0 ) /* TODO: clean up paths passing 0 in here. */ return 0; - return hw_uncompressed_size(xcr0); + if ( xcr0 <= (X86_XCR0_SSE | X86_XCR0_FP) ) + return size; + + /* + * For the non-legacy states, search all activate states and find the + * maximum offset+size. Some states (e.g. LWP, APX_F) are out-of-order + * with respect their index. + */ + xcr0 &= ~XSTATE_FP_SSE; + for_each_set_bit ( i, &xcr0, 63 ) + { + unsigned int s; + + ASSERT(xstate_offsets[i] && xstate_sizes[i]); + + s = xstate_offsets[i] && xstate_sizes[i]; + + size = max(size, s); + } + + /* In debug builds, cross-check our calculation with hardware. */ + if ( IS_ENABLED(CONFIG_DEBUG) ) + { + unsigned int hwsize; + + xcr0 |= XSTATE_FP_SSE; + hwsize = hw_uncompressed_size(xcr0); + + if ( size != hwsize ) + printk_once(XENLOG_ERR "%s(%#"PRIx64") size %#x != hwsize %#x\n", + __func__, xcr0, size, hwsize); + size = hwsize; + } + + return size; } static bool valid_xcr0(uint64_t xcr0)
We're soon going to need a compressed helper of the same form. The size of the uncompressed image depends on the single element with the largest offset+size. Sadly this isn't always the element with the largest index. Retain the cross-check with hardware in debug builds, but forgo it normal builds. In particular, this means that the migration paths don't need to mess with XCR0 just to sanity check the buffer size. No practical change. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Jan Beulich <JBeulich@suse.com> CC: Roger Pau Monné <roger.pau@citrix.com> v2: * Scan all features. LWP/APX_F are out-of-order. --- xen/arch/x86/domctl.c | 2 +- xen/arch/x86/hvm/hvm.c | 2 +- xen/arch/x86/include/asm/xstate.h | 2 +- xen/arch/x86/xstate.c | 45 +++++++++++++++++++++++++++---- 4 files changed, 43 insertions(+), 8 deletions(-)